In [1]:
import pandas as pd
import json

from tqdm.auto import tqdm
tqdm.pandas()


import sys, os
sys.path.append(os.path.abspath('../'))
from scripts.utils import create_dir, get_runtime
import time
start_time = time.time()


  from .autonotebook import tqdm as notebook_tqdm


## Preparing the Historical Dataset (2017-2023)

In [4]:
rental_hist_df = pd.read_csv('../data/curated/rental_data_cleaned.csv')
rental_hist_df.rename(columns={'sa2_code_left': 'sa2_code'}, inplace=True)
rental_hist_df = rental_hist_df[
    [
        'address', 'suburb', 'bed', 'bath', 'car', 'type', 'year', 'sa2_code',
        'median_income', 'population', 'cpi', 'unemployment_rate',
        'lat', 'lng',  'geometry', 'rented_price'
    ]
]
# strip only beginning whitespace character in suburb
rental_hist_df['suburb'] = rental_hist_df['suburb'].str.lstrip()
# convert sa2_code to string
rental_hist_df['sa2_code'] = rental_hist_df['sa2_code'].astype('Int64').astype(str)
# remove type that is not house or unit
rental_hist_df = rental_hist_df[
    rental_hist_df['type'].isin(['House', 'Unit/apmt'])
]
rental_hist_df

Unnamed: 0,address,suburb,bed,bath,car,type,year,sa2_code,median_income,population,cpi,unemployment_rate,lat,lng,geometry,rented_price
0,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),800
1,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),800
2,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2021,206041505,39300.0000,16098.0,3.5,4.200000,-37.813730,144.955580,POINT (144.95558001 -37.81373001),540
3,"1702/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),720
4,"1702/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1531550,"2 WHITECAPS AVENUE, POINT COOK",POINT COOK,4.0,2.0,2.0,House,2019,213051466,59244.0000,19047.0,1.8,5.175000,-37.907750,144.735372,POINT (144.7353721002 -37.90775002),520
1531551,"96 BOARDWALK BOULEVARD, POINT COOK",POINT COOK,3.0,2.0,2.0,Unit/apmt,2019,213051581,57433.0000,9739.0,1.8,5.175000,-37.883631,144.732710,POINT (144.73271 -37.883631),370
1531552,"96 BOARDWALK BOULEVARD, POINT COOK",POINT COOK,3.0,2.0,2.0,Unit/apmt,2019,213051581,57433.0000,9739.0,1.8,5.175000,-37.883631,144.732710,POINT (144.73271 -37.883631),370
1531553,"96 BOARDWALK BOULEVARD, POINT COOK",POINT COOK,3.0,2.0,2.0,Unit/apmt,2019,213051581,57433.0000,9739.0,1.8,5.175000,-37.883631,144.732710,POINT (144.73271 -37.883631),370


In [3]:
proximity_hist_df = pd.read_csv('../data/raw/proximity/time_city.csv')
proximity_hist_df

Unnamed: 0,lat,lng,city_index,time_city
0,-37.813730,144.955580,0,263.3
1,-37.810280,144.956670,0,210.9
2,-37.813590,144.955720,0,232.5
3,-37.813700,144.953930,0,228.1
4,-37.808826,144.959549,0,225.9
...,...,...,...,...
447141,-37.896543,144.723878,0,1962.8
447142,-37.913624,144.760232,0,2271.4
447143,-37.915965,144.780830,0,2484.2
447144,-37.896490,144.741029,0,1963.3


In [4]:
price_df = pd.read_csv('../data/raw/property-buy-sell/oldlistings_buy_avg.csv')
price_hist_df = price_df[
    (price_df['year'] >= 2017) &
    (price_df['year'] <= 2023)
]
price_hist_df

Unnamed: 0,year,suburb,avg_property_price
13957,2017,ABBOTSFORD,4.848551e+05
13958,2017,ABERFELDIE,1.229353e+06
13959,2017,ACHERON,5.990000e+05
13960,2017,AChE CREEK,1.140000e+06
13961,2017,AChEDALE,5.540000e+05
...,...,...,...
23553,2023,YELTA,4.250000e+05
23554,2023,YENDON,6.590000e+05
23555,2023,YINNAR,6.017227e+05
23556,2023,YINNAR SOUTH,7.700000e+05


Join historical

In [5]:
hist_df = rental_hist_df.merge(proximity_hist_df, on=['lat', 'lng'], how='inner')
hist_df = hist_df.merge(price_hist_df, on=['suburb', 'year'], how='inner')
hist_df

Unnamed: 0,address,suburb,bed,bath,car,type,year,sa2_code,median_income,population,cpi,unemployment_rate,lat,lng,geometry,rented_price,city_index,time_city,avg_property_price
0,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),800,0,263.3,682488.770000
1,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),800,0,263.3,682488.770000
2,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2021,206041505,39300.0000,16098.0,3.5,4.200000,-37.813730,144.955580,POINT (144.95558001 -37.81373001),540,0,263.3,619543.745192
3,"1702/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),720,0,263.3,682488.770000
4,"1702/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2023,206041505,44492.4005,20027.0,5.6,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),650,0,263.3,682488.770000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230979,"2 WHITECAPS AVENUE, POINT COOK",POINT COOK,4.0,2.0,2.0,House,2019,213051466,59244.0000,19047.0,1.8,5.175000,-37.907750,144.735372,POINT (144.7353721002 -37.90775002),520,0,2111.5,614604.919431
1230980,"96 BOARDWALK BOULEVARD, POINT COOK",POINT COOK,3.0,2.0,2.0,Unit/apmt,2019,213051581,57433.0000,9739.0,1.8,5.175000,-37.883631,144.732710,POINT (144.73271 -37.883631),370,0,1775.8,614604.919431
1230981,"96 BOARDWALK BOULEVARD, POINT COOK",POINT COOK,3.0,2.0,2.0,Unit/apmt,2019,213051581,57433.0000,9739.0,1.8,5.175000,-37.883631,144.732710,POINT (144.73271 -37.883631),370,0,1775.8,614604.919431
1230982,"96 BOARDWALK BOULEVARD, POINT COOK",POINT COOK,3.0,2.0,2.0,Unit/apmt,2019,213051581,57433.0000,9739.0,1.8,5.175000,-37.883631,144.732710,POINT (144.73271 -37.883631),370,0,1775.8,614604.919431


## Preparing the Current Dataset (2024)

In [6]:
rental_curr_df = pd.read_csv('../data/raw/rental/rental_data_2024.csv')
#rename
rental_curr_df = rental_curr_df.rename(columns={
    'cpi_x': 'cpi',
    'Statistical Areas Level 2 2021 name': 'sa2_name'
})
rental_curr_df = rental_curr_df[
    [
        'address', 'suburb', 'bed', 'bath', 'car', 'type', 'year', 'sa2_code',
        'median_income', 'population', 'cpi', 'unemployment_rate',
        'lat', 'lng',  'geometry', 'rented_price'
    ]
]
# strip only beginning whitespace character in suburb
rental_curr_df['suburb'] = rental_curr_df['suburb'].str.lstrip()
# convert sa2_code to string
rental_curr_df['sa2_code'] = rental_curr_df['sa2_code'].astype('Int64').astype(str)
# remove type that is not house or unit
rental_curr_df = rental_curr_df[
    rental_curr_df['type'].isin(['House', 'Unit/apmt'])
]
rental_curr_df

Unnamed: 0,address,suburb,bed,bath,car,type,year,sa2_code,median_income,population,cpi,unemployment_rate,lat,lng,geometry,rented_price
0,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2024,206041505,42430.858339,20162,3.53,4.014286,-37.813730,144.955580,POINT (144.95558001 -37.81373001),730
1,"901/21 HEALEYS LANE, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2024,206041505,42430.858339,20162,3.53,4.014286,-37.813590,144.955720,POINT (144.95572001 -37.81359001),720
2,"3801/601 LITTLE LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2024,206041505,42430.858339,20162,3.53,4.014286,-37.813700,144.953930,POINT (144.95393001 -37.8137001),800
3,"1808/318 RUSSELL STREET, MELBOURNE",MELBOURNE,2.0,2.0,0.0,Unit/apmt,2024,206041503,42412.280478,11840,3.53,4.014286,-37.809259,144.966567,POINT (144.9665671 -37.809259),840
4,"707/77 QUEENS ROAD, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2024,206051514,66910.824282,15041,3.53,4.014286,-37.852740,144.979740,POINT (144.97974001 -37.85274001),680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106397,"7 ROSSETTI COURT, PAKENHAM",PAKENHAM,3.0,1.0,0.0,House,2024,212011552,55683.420715,28466,3.53,4.014286,-38.069069,145.463520,POINT (145.46352 -38.069069),520
106398,"7 DISCOVERY STREET, PAKENHAM",PAKENHAM,3.0,2.0,2.0,House,2024,212011551,57772.670788,7044,3.53,4.014286,-38.078518,145.482727,POINT (145.482727 -38.078518),500
106399,"7 DISCOVERY STREET, PAKENHAM",PAKENHAM,3.0,2.0,2.0,House,2024,212011551,57772.670788,7044,3.53,4.014286,-38.078518,145.482727,POINT (145.482727 -38.078518),500
106400,"19 PORTILLO CRESCENT, PAKENHAM",PAKENHAM,4.0,2.0,2.0,Unit/apmt,2024,212011550,58924.808098,9820,3.53,4.014286,-38.052810,145.454962,POINT (145.4549617002 -38.0528099002),580


In [7]:
proximity_curr_df = pd.read_csv('../data/raw/proximity/time_city_2024.csv')
proximity_curr_df

Unnamed: 0,lat,lng,city_index,time_city
0,-37.813730,144.955580,0,263.3
1,-37.813590,144.955720,0,232.5
2,-37.813700,144.953930,0,228.1
3,-37.809259,144.966567,0,124.0
4,-37.852740,144.979740,0,650.5
...,...,...,...,...
74904,-38.056386,145.475653,0,3302.3
74905,-38.081892,145.445978,0,2994.1
74906,-38.084847,145.483166,0,3085.5
74907,-38.058136,145.486220,0,3341.4


In [8]:
price_curr_df = price_df[price_df['year'] == 2024]
price_curr_df

Unnamed: 0,year,suburb,avg_property_price
23558,2024,ABBOTSFORD,4.452602e+05
23559,2024,ABERFELDIE,8.675000e+05
23560,2024,ADELAIDE LEAD,4.250000e+05
23561,2024,AIRPORT WEST,8.286422e+05
23562,2024,ALBANVALE,4.830952e+05
...,...,...,...
24696,2024,YEA,6.527045e+05
24697,2024,YEO,1.217000e+06
24698,2024,YINNAR,5.201429e+05
24699,2024,YINNAR SOUTH,8.200000e+05


In [9]:
curr_df = rental_curr_df.merge(proximity_curr_df, on=['lat', 'lng'], how='inner')
curr_df = curr_df.merge(price_curr_df, on=['suburb', 'year'], how='inner')
curr_df

Unnamed: 0,address,suburb,bed,bath,car,type,year,sa2_code,median_income,population,cpi,unemployment_rate,lat,lng,geometry,rented_price,city_index,time_city,avg_property_price
0,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2024,206041505,42430.858339,20162,3.53,4.014286,-37.813730,144.955580,POINT (144.95558001 -37.81373001),730,0,263.3,696152.566964
1,"901/21 HEALEYS LANE, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2024,206041505,42430.858339,20162,3.53,4.014286,-37.813590,144.955720,POINT (144.95572001 -37.81359001),720,0,232.5,696152.566964
2,"3801/601 LITTLE LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2024,206041505,42430.858339,20162,3.53,4.014286,-37.813700,144.953930,POINT (144.95393001 -37.8137001),800,0,228.1,696152.566964
3,"1808/318 RUSSELL STREET, MELBOURNE",MELBOURNE,2.0,2.0,0.0,Unit/apmt,2024,206041503,42412.280478,11840,3.53,4.014286,-37.809259,144.966567,POINT (144.9665671 -37.809259),840,0,124.0,696152.566964
4,"707/77 QUEENS ROAD, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2024,206051514,66910.824282,15041,3.53,4.014286,-37.852740,144.979740,POINT (144.97974001 -37.85274001),680,0,650.5,696152.566964
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87440,"7 ROSSETTI COURT, PAKENHAM",PAKENHAM,3.0,1.0,0.0,House,2024,212011552,55683.420715,28466,3.53,4.014286,-38.069069,145.463520,POINT (145.46352 -38.069069),520,0,3251.4,843219.858156
87441,"7 DISCOVERY STREET, PAKENHAM",PAKENHAM,3.0,2.0,2.0,House,2024,212011551,57772.670788,7044,3.53,4.014286,-38.078518,145.482727,POINT (145.482727 -38.078518),500,0,3126.4,843219.858156
87442,"7 DISCOVERY STREET, PAKENHAM",PAKENHAM,3.0,2.0,2.0,House,2024,212011551,57772.670788,7044,3.53,4.014286,-38.078518,145.482727,POINT (145.482727 -38.078518),500,0,3126.4,843219.858156
87443,"19 PORTILLO CRESCENT, PAKENHAM",PAKENHAM,4.0,2.0,2.0,Unit/apmt,2024,212011550,58924.808098,9820,3.53,4.014286,-38.052810,145.454962,POINT (145.4549617002 -38.0528099002),580,0,3297.1,843219.858156


In [10]:
# Join historical and current data
df = pd.concat([hist_df, curr_df], ignore_index=True)
df

Unnamed: 0,address,suburb,bed,bath,car,type,year,sa2_code,median_income,population,cpi,unemployment_rate,lat,lng,geometry,rented_price,city_index,time_city,avg_property_price
0,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2023,206041505,44492.400500,20027.0,5.60,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),800,0,263.3,682488.770000
1,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2023,206041505,44492.400500,20027.0,5.60,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),800,0,263.3,682488.770000
2,"201/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,2.0,1.0,Unit/apmt,2021,206041505,39300.000000,16098.0,3.50,4.200000,-37.813730,144.955580,POINT (144.95558001 -37.81373001),540,0,263.3,619543.745192
3,"1702/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2023,206041505,44492.400500,20027.0,5.60,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),720,0,263.3,682488.770000
4,"1702/560 LONSDALE STREET, MELBOURNE",MELBOURNE,2.0,1.0,0.0,Unit/apmt,2023,206041505,44492.400500,20027.0,5.60,3.691667,-37.813730,144.955580,POINT (144.95558001 -37.81373001),650,0,263.3,682488.770000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1318424,"7 ROSSETTI COURT, PAKENHAM",PAKENHAM,3.0,1.0,0.0,House,2024,212011552,55683.420715,28466.0,3.53,4.014286,-38.069069,145.463520,POINT (145.46352 -38.069069),520,0,3251.4,843219.858156
1318425,"7 DISCOVERY STREET, PAKENHAM",PAKENHAM,3.0,2.0,2.0,House,2024,212011551,57772.670788,7044.0,3.53,4.014286,-38.078518,145.482727,POINT (145.482727 -38.078518),500,0,3126.4,843219.858156
1318426,"7 DISCOVERY STREET, PAKENHAM",PAKENHAM,3.0,2.0,2.0,House,2024,212011551,57772.670788,7044.0,3.53,4.014286,-38.078518,145.482727,POINT (145.482727 -38.078518),500,0,3126.4,843219.858156
1318427,"19 PORTILLO CRESCENT, PAKENHAM",PAKENHAM,4.0,2.0,2.0,Unit/apmt,2024,212011550,58924.808098,9820.0,3.53,4.014286,-38.052810,145.454962,POINT (145.4549617002 -38.0528099002),580,0,3297.1,843219.858156


## Preparing Future Dataset (2029)

In [11]:
# Create dictionary for unique suburb as key and SA2 code as value
suburb_sa2_dict = \
    df[['suburb', 'sa2_code']] \
        .drop_duplicates(). \
            set_index('suburb') \
                .to_dict()['sa2_code']
suburb_sa2_dict

{'MELBOURNE': '213031347',
 'IVANHOE': '209011203',
 'DOCKLANDS': '206051130',
 'BEACONSFIELD': '205041094',
 'MOONEE PONDS': '206041504',
 'MILDURA': '210051445',
 'CRAIGIEBURN': '209041225',
 'SOUTH MORANG': '209041527',
 'BRIGHTON': '208011173',
 'TEMPLESTOWE LOWER': '207021156',
 'BRUNSWICK WEST': '206041124',
 'BAIRNSDALE': '207011149',
 'DONCASTER EAST': '211041270',
 'ST LEONARDS': '206031113',
 'HAWTHORN EAST': '207011154',
 'THORNBURY': '209021429',
 'AIRPORT WEST': '210051250',
 'HUGHESDALE': '216021413',
 'ST KILDA EAST': '217031476',
 'CARLTON NORTH': '217011421',
 'SALE': '205051102',
 'ROWVILLE': '211011448',
 'ORMOND': '208021174',
 'DARNUM': '205011079',
 'BURWOOD EAST': '207031161',
 'HAZELWOOD': '205041094',
 'CARLTON': '207011519',
 'MACLEOD': '212021295',
 'WODONGA': '210011533',
 'TARNEIT': '203031051',
 'HOPPERS CROSSING': '213051586',
 'LYNDHURST': '203021044',
 'SOUTH YARRA': '211051286',
 'POINT COOK': '208021174',
 'DONVALE': '212041317',
 'KEW': '205041493',


In [12]:
df['type'].unique()

array(['Unit/apmt', 'House'], dtype=object)

In [13]:
future_df = df[[
    'suburb', 'type', 'sa2_code', 'bed', 'bath', 'car', 'time_city',
    'avg_property_price', 'rented_price'
]]
future_df

Unnamed: 0,suburb,type,sa2_code,bed,bath,car,time_city,avg_property_price,rented_price
0,MELBOURNE,Unit/apmt,206041505,2.0,2.0,1.0,263.3,682488.770000,800
1,MELBOURNE,Unit/apmt,206041505,2.0,2.0,1.0,263.3,682488.770000,800
2,MELBOURNE,Unit/apmt,206041505,2.0,2.0,1.0,263.3,619543.745192,540
3,MELBOURNE,Unit/apmt,206041505,2.0,1.0,0.0,263.3,682488.770000,720
4,MELBOURNE,Unit/apmt,206041505,2.0,1.0,0.0,263.3,682488.770000,650
...,...,...,...,...,...,...,...,...,...
1318424,PAKENHAM,House,212011552,3.0,1.0,0.0,3251.4,843219.858156,520
1318425,PAKENHAM,House,212011551,3.0,2.0,2.0,3126.4,843219.858156,500
1318426,PAKENHAM,House,212011551,3.0,2.0,2.0,3126.4,843219.858156,500
1318427,PAKENHAM,Unit/apmt,212011550,4.0,2.0,2.0,3297.1,843219.858156,580


In [14]:
# group by suburb, type, and sa2_code - aggregate by average for the rest
future_df = future_df.groupby([
    'suburb', 'type', 'sa2_code'
]).agg({
    'bed': 'mean',
    'bath': 'mean',
    'car': 'mean',
    'time_city': 'mean',
    'avg_property_price': 'mean',
    'rented_price': 'mean'
}).reset_index()
future_df

Unnamed: 0,suburb,type,sa2_code,bed,bath,car,time_city,avg_property_price,rented_price
0,ABBOTSFORD,House,206071139,2.336884,1.288948,0.708389,584.456458,5.635304e+05,629.098535
1,ABBOTSFORD,House,206071517,2.000000,1.000000,2.000000,598.700000,6.325728e+05,642.000000
2,ABBOTSFORD,House,206071518,1.333333,1.000000,0.833333,612.500000,5.748636e+05,476.666667
3,ABBOTSFORD,House,207011521,3.000000,2.000000,1.000000,923.200000,4.848551e+05,720.000000
4,ABBOTSFORD,Unit/apmt,203021047,1.000000,1.000000,0.000000,475.100000,6.516417e+05,366.666667
...,...,...,...,...,...,...,...,...,...
4964,YINNAR,House,205031090,4.000000,2.000000,4.000000,7123.600000,6.017227e+05,550.000000
4965,YINNAR,House,205041094,3.328358,1.701493,1.910448,7307.419403,5.062333e+05,392.462687
4966,YINNAR,Unit/apmt,205041094,2.805556,1.361111,2.277778,7308.608333,4.844415e+05,337.083333
4967,YINNAR SOUTH,House,205041094,3.666667,2.000000,2.666667,7742.800000,9.439556e+05,440.000000


In [18]:
# import future predictions data

# CPI
future_cpi = pd.read_csv('../data/raw/predictions/cpi_data.csv')
future_cpi_2029 = float(
    future_cpi[future_cpi['year'] == 2029]['Annual change (%)'] \
    .iloc[0]
)

# Unemployment rate
future_unemployment = pd.read_csv('../data/raw/predictions/yearly_avg_unemployment.csv')
future_unemployment_2029 = float(
    future_unemployment[
        future_unemployment['year'] == 2029
    ]['Yearly Average Seasonally Adjusted (%)'] \
    .iloc[0]

)

# Median income
future_income = pd.read_csv('../data/raw/predictions/predicted_median_income.csv')
future_income_2029_df = future_income[
    future_income['year'] == 2029
][['sa2_code', 'median_income']]
future_income_2029_df['sa2_code'] = future_income_2029_df['sa2_code'].astype(str)

# Population
future_population = pd.read_csv('../data/raw/predictions/population_prediction.csv')
future_population_2029_df = future_population[
    future_population['year'] == 2029
][['Statistical Areas Level 2 2021 code', 'Projected persons (total)']]
future_population_2029_df = future_population_2029_df.rename(columns={
    'Statistical Areas Level 2 2021 code': 'sa2_code',
    'Projected persons (total)': 'population'
})
future_population_2029_df['sa2_code'] = future_population_2029_df['sa2_code'].astype(str)

# merge future predictions data
future_df_merge = future_df.merge(future_income_2029_df, on='sa2_code', how='inner')
future_df_merge = future_df_merge.merge(future_population_2029_df, on='sa2_code', how='inner')
future_df_merge['cpi'] = future_cpi_2029
future_df_merge['unemployment_rate'] = future_unemployment_2029
future_df_merge

Unnamed: 0,suburb,type,sa2_code,bed,bath,car,time_city,avg_property_price,rented_price,median_income,population,cpi,unemployment_rate
0,ABBOTSFORD,House,206071139,2.336884,1.288948,0.708389,584.456458,5.635304e+05,629.098535,91257.253099,11728,2.5,4.59
1,ABBOTSFORD,House,206071517,2.000000,1.000000,2.000000,598.700000,6.325728e+05,642.000000,98618.500273,19800,2.5,4.59
2,ABBOTSFORD,House,206071518,1.333333,1.000000,0.833333,612.500000,5.748636e+05,476.666667,106468.023481,17087,2.5,4.59
3,ABBOTSFORD,House,207011521,3.000000,2.000000,1.000000,923.200000,4.848551e+05,720.000000,89531.438200,11798,2.5,4.59
4,ABBOTSFORD,Unit/apmt,203021047,1.000000,1.000000,0.000000,475.100000,6.516417e+05,366.666667,70447.277529,16323,2.5,4.59
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4964,YINNAR,House,205031090,4.000000,2.000000,4.000000,7123.600000,6.017227e+05,550.000000,59549.916415,12427,2.5,4.59
4965,YINNAR,House,205041094,3.328358,1.701493,1.910448,7307.419403,5.062333e+05,392.462687,64001.329257,12617,2.5,4.59
4966,YINNAR,Unit/apmt,205041094,2.805556,1.361111,2.277778,7308.608333,4.844415e+05,337.083333,64001.329257,12617,2.5,4.59
4967,YINNAR SOUTH,House,205041094,3.666667,2.000000,2.666667,7742.800000,9.439556e+05,440.000000,64001.329257,12617,2.5,4.59


## Output to CSVs

In [19]:
output = False
if output:
    create_dir('../data/curated')
    # df.to_csv('../data/curated/rental-17-24.csv', index=False)
    future_df_merge.to_csv('../data/curated/suburb-rental-29.csv', index=False)
    with open('../data/curated/suburb_to_sa2.json', 'w') as f:
        json.dump(suburb_sa2_dict, f)

Directory already exists: ../data/curated

