### With predicted rental prices, we are now interested in the growth rates for each SA2

In [7]:
import pandas as pd
import glob
import os

path = r'../data/curated/2023_2027_rental_prediction_xgboost/' # use your path
all_files = glob.glob(os.path.join(path , "*.csv"))

li = []

for filename in sorted(all_files):
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

merged_df = pd.concat(li, axis=0, ignore_index=True)
predicted = merged_df.groupby(['year', 'sa2_2021'], as_index=False).agg({'predicted_price': 'mean'})
predicted

Unnamed: 0,year,sa2_2021,predicted_price
0,2023,201011001,412.941470
1,2023,201011002,464.353273
2,2023,201011005,375.598196
3,2023,201011006,415.771877
4,2023,201011007,497.931755
...,...,...,...
2490,2027,217031476,405.364614
2491,2027,217041477,387.895111
2492,2027,217041478,629.722419
2493,2027,217041479,377.377171


In [8]:
rental_2022 = pd.read_csv('../data/curated/merged_dataset/2022_merged_data.csv')
rental_2022.dropna(inplace=True)
rental_2022 = rental_2022.groupby(['year', 'sa2_2021'], as_index=False).agg({'weekly_rent': 'mean'})
rental_2022.rename({'weekly_rent': 'predicted_price'}, axis=1, inplace=True)
rental_2022
#rental_2022[rental_2022['sa2_2021']==206041127]
#set(rental_2022['sa2_2021'].unique()) - set(predicted['sa2_2021'].unique())

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
494,2022,217031476,518.750000
495,2022,217041477,414.069767
496,2022,217041478,771.301370
497,2022,217041479,437.025641


In [9]:
growth_df = pd.concat([rental_2022, predicted], axis=0).reset_index(drop=True)
#df[df['sa2_2021']==201011001]
growth_df

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
2989,2027,217031476,405.364614
2990,2027,217041477,387.895111
2991,2027,217041478,629.722419
2992,2027,217041479,377.377171


In [10]:
subset = growth_df[growth_df['sa2_2021']==201011001]
subset
growth_dict = dict()
for sa2 in growth_df['sa2_2021'].unique():
    growth_dict[sa2] = []
    subset = growth_df[growth_df['sa2_2021']==sa2]
    for i in range(len(subset['predicted_price'])-1):
        # (current year price - past year price) / past year price
        growth_rate = (subset.iloc[i+1]['predicted_price'] - subset.iloc[i]['predicted_price'])/subset.iloc[i]['predicted_price']
        growth_dict[sa2].append(growth_rate)


In [11]:
growth_dict

{201011001: [-0.05606476979557922,
  0.018836277208971172,
  0.0,
  0.05935411550411792,
  0.0],
 201011002: [0.10226983634798294,
  0.0,
  0.0,
  0.04573879751033222,
  0.09529048620199557],
 201011005: [-0.06697476171817233,
  0.003175729113967485,
  -0.006227195683867107,
  0.03819970197597374,
  0.0],
 201011006: [-0.0550639163223141,
  0.02605983465393088,
  0.022205887639497025,
  0.0,
  0.01854021232596706],
 201011007: [-0.06161271142520611,
  0.002745024767500479,
  0.022643074157289333,
  -0.021368300610191793,
  0.023499780365363945],
 201011008: [-0.05216794615186039,
  0.0023446678267555793,
  0.023478725622334246,
  0.0022607047272406116,
  0.010990237237750644],
 201011481: [0.04478370284449917,
  0.026245826710122024,
  0.025039927129935895,
  0.010647495785364716,
  0.0],
 201011482: [0.003717588321006474,
  0.017432717079111582,
  0.013916404078333102,
  0.013547110571090022,
  0.0],
 201011483: [-0.0024411989098745246,
  0.0015781258707405674,
  0.0442282235114506,
 

In [12]:
import numpy as np
for key in growth_dict.keys():
    growth_dict[key] = np.mean(growth_dict[key])
growth_dict

{201011001: 0.004425124583501974,
 201011002: 0.04865982401206214,
 201011005: -0.0063653052624196415,
 201011006: 0.002348403659416172,
 201011007: -0.0068186265490488285,
 201011008: -0.002618722147555862,
 201011481: 0.02134339049398436,
 201011482: 0.009722764009908237,
 201011483: 0.014488271653872014,
 201011484: 0.01148127642063556,
 201021009: 0.012541201979291284,
 201021010: 0.010693270776630136,
 201021011: -0.01172124072133186,
 201021012: 0.011788731961075487,
 201031013: 0.012451738110760977,
 201031014: 0.008429730267167704,
 201031015: -0.004459717657819637,
 201031016: 0.017374781169305527,
 201031017: 0.010026505461018615,
 202011018: 0.015616857157043867,
 202011019: -0.025005510416012216,
 202011020: 0.010891987664425307,
 202011021: 0.0036917515063110036,
 202011022: 0.02738623338159272,
 202011023: -0.010623063763087614,
 202011024: -0.0005076823553966074,
 202011025: -0.004409474762736928,
 202021026: -0.00791004734148989,
 202021027: 0.021311544312744832,
 20202