Summary of Key Steps:
Load and Clean Data: Ensure you have clean, time-indexed data for Median_Price, Population, and Income.
Transform to Long Format: Convert data into a long format suitable for time series analysis, and align it by Date.
Train-Test Split: Split the data into train and forecast sets based on a cutoff date (e.g., March 2022).
Fit AutoARIMA: Use AutoARIMA to model Median_Price, with population and income as exogenous variables.
Forecast Future Quarters: Predict future rental prices based on the exogenous population and income data for 2022-2024.
Analyze Results: Store the forecast and calculate potential growth rates if required.

In [70]:
import pandas as pd
import numpy as np

In [68]:
df = pd.read_csv('../../data/curated/AllProperties.csv')
df.loc[df['Suburb'] == 'Wanagaratta', 'Suburb'] = 'Wangaratta'

In [71]:
df.columns = df.columns.str.strip()

years = [2018, 2019, 2020, 2021, 2022, 2023]
df_weekly_rent = df[['Suburb']].copy()

for year in years:
    year_cols = [col for col in df.columns if col.startswith(str(year))]
    df[year_cols] = df[year_cols].apply(pd.to_numeric, errors='coerce')

    df_temp = df[year_cols].replace(0, np.nan)
    avg_without_zeros = df_temp.mean(axis=1, skipna=True)

    all_zero_mask = (df[year_cols] == 0).all(axis=1)
    column_name = f"{year}_weekly_rent"
    df_weekly_rent[column_name] = np.where(all_zero_mask, 0, avg_without_zeros)

df_weekly_rent

Unnamed: 0,Suburb,2018_weekly_rent,2019_weekly_rent,2020_weekly_rent,2021_weekly_rent,2022_weekly_rent,2023_weekly_rent
0,Albert Park-Middle Park-West St Kilda,737.375,720.875,702.750,711.250,684.375,665.5
1,Armadale,702.125,624.000,621.500,628.000,642.250,620.5
2,Carlton North,535.625,537.875,564.875,587.625,573.000,555.0
3,Carlton-Parkville,2418.875,2458.875,2663.875,2615.250,3372.250,3545.0
4,CBD-St Kilda Rd,5352.125,5629.750,6095.625,8814.125,9021.500,8963.0
...,...,...,...,...,...,...,...
141,Traralgon,691.375,621.500,594.375,593.750,633.500,644.0
142,Wangaratta,451.875,460.375,473.125,450.250,457.375,449.0
143,Warragul,398.875,420.625,465.875,459.750,465.250,468.5
144,Warrnambool,751.125,695.750,663.000,614.750,633.375,643.5


In [72]:
df_new = df_weekly_rent.copy()

population_years = [2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026]
for year in population_years:
    
    # initialise population columns
    population_column_name = f"{year}_population"
    df_new[population_column_name] = 0
    
    # initialise income columns
    income_column_name = f"{year}_income"
    df_new[income_column_name] = 0.0

df_new

Unnamed: 0,Suburb,2018_weekly_rent,2019_weekly_rent,2020_weekly_rent,2021_weekly_rent,2022_weekly_rent,2023_weekly_rent,2018_population,2018_income,2019_population,...,2022_population,2022_income,2023_population,2023_income,2024_population,2024_income,2025_population,2025_income,2026_population,2026_income
0,Albert Park-Middle Park-West St Kilda,737.375,720.875,702.750,711.250,684.375,665.5,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
1,Armadale,702.125,624.000,621.500,628.000,642.250,620.5,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2,Carlton North,535.625,537.875,564.875,587.625,573.000,555.0,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
3,Carlton-Parkville,2418.875,2458.875,2663.875,2615.250,3372.250,3545.0,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
4,CBD-St Kilda Rd,5352.125,5629.750,6095.625,8814.125,9021.500,8963.0,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,Traralgon,691.375,621.500,594.375,593.750,633.500,644.0,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
142,Wangaratta,451.875,460.375,473.125,450.250,457.375,449.0,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
143,Warragul,398.875,420.625,465.875,459.750,465.250,468.5,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
144,Warrnambool,751.125,695.750,663.000,614.750,633.375,643.5,0,0.0,0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0


In [73]:
df_population = pd.read_csv('../../data/curated/population_by_sa2.csv')
df_income = pd.read_csv('../../data/curated/income_by_sa2.csv')

In [75]:
with open('../../notebooks/Julian/missing_suburbs.txt') as f:
    missing_suburbs = f.read().splitlines()

In [142]:
suburb_count = {}

for label in df_income.Label:
    if label in missing_suburbs:
        continue
    found = False
    for suburb in df_weekly_rent.Suburb.values:
        if label == 'Carlton' and suburb == 'Carlton-Parkville' or \
        label == 'Keilor' and suburb == 'Keilor' or \
        label == 'Pascoe Vale' and suburb == 'Pascoe Vale-Coburg North' or \
        label == 'Bendigo Surrounds - North' and suburb == 'North Bendigo' or \
        label == 'East Bendigo - Kennington' and suburb == 'Flora Hill-Bendigo East' or \
        label == 'Brunswick East' and suburb == 'East Brunswick' or \
        label == 'Brunswick West' and suburb == 'West Brunswick' or \
        label == 'Carlton North - Princes Hill' and suburb == 'Carlton North' or \
        label == 'Dandenong - North' and suburb == 'Dandenong North-Endeavour Hills' or \
        label == 'Dandenong - South' and suburb == 'Dandenong' or \
        label == 'Gladstone Park - Westmeadows' and suburb == 'Gladstone Park-Tullamarine' or \
        label == 'Hawthorn East' and suburb == 'East Hawthorn' or \
        label == 'Highett (West) - Cheltenham' and suburb == 'Cheltenham' or \
        label == 'Ivanhoe East - Eaglemont' and suburb == 'Ivanhoe-Ivanhoe East' or \
        label == 'North Geelong - Bell Park' and suburb == 'North Geelong' or \
        label == 'Research - North Warrandyte' and suburb == 'Eltham-Research-Montmorency' or \
        label == 'Geelong West - Hamlyn Heights' and suburb == 'Herne Hill-Geelong West' or \
        label == 'St Kilda - Central' and suburb == 'St Kilda' or \
        label == 'St Kilda - West' and suburb == 'Albert Park-Middle Park-West St Kilda' or \
        label == 'St Kilda East' and suburb == 'East St Kilda' or \
        label == 'West Footscray - Tottenham' and suburb == 'West Footscray' or \
        'Berwick' in label and 'Berwick' in suburb or \
        'Bundoora' in label and 'Bundoora' in suburb or \
        'Clayton' in label and 'Clayton' in suburb or \
        'Craigieburn' in label and 'Craigieburn' in suburb or \
        'Croydon' in label and 'Croydon' in suburb or \
        'Doncaster East' in label and 'Doncaster East' in suburb or \
        'Endeavour Hills' in label and 'Endeavour Hills' in suburb or \
        'Epping' in label and 'Epping' in suburb or \
        'Essendon' in label and 'Essendon' in suburb or \
        'Ferntree Gully' in label and 'Ferntree Gully' in suburb or \
        'Glen Waverley' in label and 'Glen Waverley' in suburb or \
        'Glenroy' in label and 'Glenroy' in suburb or \
        'Hampton Park' in label and 'Hampton Park' in suburb or \
        'Hoppers Crossing' in label and 'Hoppers Crossing' in suburb or \
        'Kew' in label and 'Kew' in suburb or \
        'Lalor' in label and 'Lalor' in suburb or \
        'CBD' in label and 'CBD' in suburb or \
        'Mildura' in label and 'Mildura' in suburb or \
        'Mill Park' in label and 'Mill Park' in suburb or \
        'Mornington' in label and 'Mornington' in suburb or \
        'Mount Waverley' in label and 'Mount Waverley' in suburb or \
        'Narre Warren' in label and 'Narre Warren' in suburb or \
        'Noble Park' in label and 'Noble Park' in suburb or \
        'Northcote' in label and 'Northcote' in suburb or \
        'Pakenham' in label and 'Pakenham' in suburb or \
        'Preston' in label and 'Preston' in suburb or \
        'Reservoir' in label and 'Reservoir' in suburb or \
        'Richmond' in label and 'Richmond' in suburb or \
        'Rowville' in label and 'Rowville' in suburb or \
        'Roxburgh' in label and 'Roxburgh' in suburb or \
        'Shepparton' in label and 'Shepparton' in suburb or \
        'South Yarra' in label and 'South Yarra' in suburb or \
        'Southbank' in label and 'Southbank' in suburb or \
        'Albans' in label and 'Albans' in suburb or \
        'Surrey' in label and 'Surrey' in suburb or \
        'Traralgon' in label and 'Traralgon' in suburb or \
        'Warrnambool' in label and 'Warrnambool' in suburb or \
        'Werribee' in label and 'Werribee' in suburb or \
        'West Melbourne' in label and 'West Melbourne' in suburb or \
        label in suburb:
            found = True
            if suburb not in suburb_count:
                suburb_count[suburb] = 1
            else:
                suburb_count[suburb] += 1

            # population
            df_new.loc[df_new['Suburb'] == suburb, '2018_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2018'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2019_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2019'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2020_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2020'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2021_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2021'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2022_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2022'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2023_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2023'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2024_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2024'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2025_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2025'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2026_population'] += int(df_population.loc[df_population['Label'] == label, 'estimated_population_2026'].values[0])

            # income
            df_new.loc[df_new['Suburb'] == suburb, '2018_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2018'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2019_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2019'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2020_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2020'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2021_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2021'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2022_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2022'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2023_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2023'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2024_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2024'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2025_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2025'].values[0])
            df_new.loc[df_new['Suburb'] == suburb, '2026_income'] += float(df_income.loc[df_income['Label'] == label, 'median_income_2026'].values[0])

# average median income for suburbs with multiple SA2s
for suburb in suburb_count:
    if suburb_count[suburb] > 1:
        df_new.loc[df_new['Suburb'] == suburb, '2018_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2019_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2020_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2021_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2022_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2023_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2024_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2025_income'] /= suburb_count[suburb]
        df_new.loc[df_new['Suburb'] == suburb, '2026_income'] /= suburb_count[suburb]

In [143]:
df_new = df_new[['Suburb'] + sorted(df_new.columns.drop('Suburb'))]
df_new = df_new[(df_new != 0).all(axis=1)].reset_index(drop=True)
df_new

Unnamed: 0,Suburb,2018_income,2018_population,2018_weekly_rent,2019_income,2019_population,2019_weekly_rent,2020_income,2020_population,2020_weekly_rent,...,2023_income,2023_population,2023_weekly_rent,2024_income,2024_population,2025_income,2025_population,2026_income,2026_population,growth_2023_to_2026
0,Albert Park-Middle Park-West St Kilda,129800.0,33456,737.375,131760.00,34162,720.875,137672.00,33910,702.750,...,154001.839067,33722,665.5,160479.843039,35330,166111.185176,36560,172662.990452,37412,
1,Armadale,109934.0,176136,702.125,113652.00,183086,624.000,116874.00,189940,621.500,...,131366.552557,210188,620.5,136751.533773,218556,142365.547223,225618,147771.490741,231306,
2,Carlton North,64101.0,59712,535.625,66717.00,60046,537.875,68578.50,59528,564.875,...,77976.916258,59814,555.0,81521.695868,67978,85045.780382,73498,88264.445937,76382,
3,Carlton-Parkville,54645.0,58146,2418.875,57101.25,58640,2458.875,59135.25,58366,2663.875,...,67445.004824,58758,3545.0,70493.198502,67120,73587.563542,72666,76555.712284,75506,
4,Collingwood-Abbotsford,88390.5,38264,1167.375,92448.75,38846,1216.875,98025.75,39282,1247.750,...,111147.563167,40986,1341.5,115840.981918,43742,120059.859012,45752,124898.634576,47074,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,Swan Hill,88630.0,22432,317.250,89936.00,22528,315.500,92604.00,22556,297.500,...,104658.888178,21896,309.5,109669.189559,21962,114016.108059,22232,118366.929530,22618,
110,Torquay,102952.0,41992,538.875,107974.00,44298,571.500,113276.00,46692,617.875,...,127921.152597,52068,583.5,132931.555205,53026,138225.740832,54064,143904.480880,55214,
111,Wangaratta,92430.0,38942,451.875,95950.00,39248,460.375,98564.00,39432,473.125,...,111394.922824,39938,449.0,116156.500109,40068,121125.169329,40454,125766.485112,41066,
112,Warragul,93390.0,41002,398.875,97486.00,42566,420.625,100982.00,44198,465.875,...,114068.372851,48834,468.5,118711.317378,50006,123707.916330,51172,128638.814675,52326,


In [103]:
!pip install pmdarima

2867.69s - pydevd: Sending message related to process being replaced timed-out after 5 seconds




# Modelling

In [116]:
suburbs = df_new['Suburb'].unique()
len(suburbs)

114

In [132]:
from pmdarima import auto_arima
import pandas as pd
import numpy as np

forecast_results = pd.DataFrame()

# we have 114 unique suburbs
suburbs = df_new['Suburb'].unique()

for suburb in suburbs:
    # take specific suburb
    suburb_data = df_new[df_new['Suburb'] == suburb]
    
    # target variable (y) - the weekly rent time series
    y_train = suburb_data[['2018_weekly_rent', '2019_weekly_rent', '2020_weekly_rent', 
                           '2021_weekly_rent', '2022_weekly_rent', '2023_weekly_rent']].values.flatten()
    
     # exogenous variables (population and income over time)
    exogenous_train = suburb_data[['2018_population', '2019_population', '2020_population', 
                                   '2021_population', '2022_population', '2023_population']].values.flatten()
    exogenous_income = suburb_data[['2018_income', '2019_income', '2020_income', 
                                    '2021_income', '2022_income', '2023_income']].values.flatten()
    
    # stack population and income into a single matrix 
    exogenous_vars = np.column_stack([exogenous_train, exogenous_income])

    # error handling 
    if len(y_train) != exogenous_vars.shape[0]:
        print(f"Length mismatch for {suburb}: y_train length = {len(y_train)}, exogenous_vars rows = {exogenous_vars.shape[0]}")
        continue  

    try:
        # test without seasonal parameter since we are doing per year
        model = auto_arima(y_train, exogenous=exogenous_vars, seasonal=False, trace=True, suppress_warnings=True)
    except ValueError as e:
        print(f"Error fitting ARIMA for {suburb}: {e}")
        continue  
    
    #  future exogenous variables from 2024-2026
    exogenous_future = suburb_data[['2024_population', '2025_population', '2026_population',
                                    '2024_income', '2025_income', '2026_income']].values.flatten()

    # forecasts
    forecast_horizon = 3  # For 2024-2026
    forecast = model.predict(n_periods=forecast_horizon, exogenous=exogenous_future)
    
    # Store the forecast
    forecast_results[suburb] = forecast






Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=65.124, Time=0.10 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=97.710, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=67.397, Time=0.10 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=67.122, Time=0.04 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.10 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.10 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.610 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=72.055, Time=0.06 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=96.572, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]   

  forecast_results[suburb] = forecast
  forecast_results[suburb] = forecast


 ARIMA(1,0,1)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=90.267, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=49.326, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=50.815, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.03 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.08 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.198 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=62.999, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=92.785, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=64.941, Time=0.07 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=64.424, Time=0.06 sec
 ARIM

  forecast_results[suburb] = forecast


 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.07 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=67.898, Time=0.10 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=64.755, Time=0.04 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=64.025, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=64.129, Time=0.06 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.554 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=69.288, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=90.977, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.01 sec


  forecast_results[suburb] = forecast


 ARIMA(2,0,1)(0,0,0)[0]             : AIC=71.333, Time=0.08 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=70.543, Time=0.08 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.07 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.08 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.428 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=61.678, Time=0.04 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=87.984, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.01 sec


  forecast_results[suburb] = forecast


 ARIMA(2,0,1)(0,0,0)[0]             : AIC=64.119, Time=0.09 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.07 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.05 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=60.271, Time=0.10 sec
 ARIMA(2,0,2)(0,0,0)[0] intercept   : AIC=inf, Time=0.11 sec

Best model:  ARIMA(2,0,2)(0,0,0)[0]          
Total fit time: 0.545 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=68.490, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=95.751, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.03 sec


  forecast_results[suburb] = forecast


 ARIMA(2,0,1)(0,0,0)[0]             : AIC=70.868, Time=0.08 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.07 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=72.452, Time=0.05 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.10 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.446 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=52.853, Time=0.04 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=84.897, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.04 sec


  forecast_results[suburb] = forecast


 ARIMA(2,0,1)(0,0,0)[0]             : AIC=55.078, Time=0.09 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=54.371, Time=0.05 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=52.018, Time=0.08 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=46.708, Time=0.02 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=47.731, Time=0.00 sec
 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=48.039, Time=0.06 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=52.629, Time=0.09 sec

Best model:  ARIMA(1,0,0)(0,0,0)[0] intercept
Total fit time: 0.705 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=74.110, Time=0.05 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=100.681, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 se

  forecast_results[suburb] = forecast


 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.15 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.07 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.17 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=69.382, Time=0.10 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=67.819, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=65.987, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=67.538, Time=0.02 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.716 seconds
Performing stepwise search to minimize aic


  forecast_results[suburb] = forecast


 ARIMA(1,0,1)(0,0,0)[0]             : AIC=58.096, Time=0.07 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=87.959, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=61.987, Time=0.07 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.07 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.05 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.24 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.655 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=69.722, Time=0.04 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=95.349, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(2,0,

  forecast_results[suburb] = forecast


 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.13 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.09 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.09 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.541 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=59.071, Time=0.06 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=92.525, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=62.543, Time=0.08 sec


  forecast_results[suburb] = forecast


 ARIMA(1,0,2)(0,0,0)[0]             : AIC=61.069, Time=0.08 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.17 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.09 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.591 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=66.104, Time=0.06 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=92.266, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec


  forecast_results[suburb] = forecast


 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=68.254, Time=0.08 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=68.070, Time=0.05 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.05 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.20 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=67.466, Time=0.08 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.605 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=71.658, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=97.087, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.02 sec


  forecast_results[suburb] = forecast


 ARIMA(2,0,1)(0,0,0)[0]             : AIC=74.982, Time=0.09 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.09 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.11 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.11 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.542 seconds


  forecast_results[suburb] = forecast


In [146]:
forecast_results

Unnamed: 0,Albert Park-Middle Park-West St Kilda,Armadale,Carlton North,Carlton-Parkville,Collingwood-Abbotsford,Docklands,East Melbourne,East St Kilda,Elwood,Fitzroy,...,Morwell,Portland,Sale-Maffra,Seymour,Shepparton,Swan Hill,Torquay,Wangaratta,Warragul,Warrnambool
2024,662.3929,627.232853,559.0,3527.407058,1327.351207,1664.958333,529.958333,1082.25,1030.689932,685.529413,...,424.6243,353.649032,609.440981,250.745555,900.895833,312.251606,587.989719,456.95294,468.010448,642.678086
2025,662.054238,626.920917,559.0,3498.165592,1321.733193,1664.958333,529.958333,1082.25,1030.689782,675.481535,...,423.605798,363.013663,608.043577,248.329336,900.895833,312.251536,586.424173,456.952853,467.096097,641.38012
2026,661.71575,626.609136,559.0,3469.166532,1316.138958,1664.958333,529.958333,1082.25,1030.689633,669.159923,...,422.589739,371.356162,606.649377,246.535446,900.895833,312.251466,584.862796,456.952766,466.183532,640.084775


In [174]:
# Make the final result more useful: rows as suburbs, and columns as the years (2024, 2025, 2026)
forecast_results.index = [2024, 2025, 2026]
final_forecast = forecast_results.T
final_forecast



Unnamed: 0,2024,2025,2026
Albert Park-Middle Park-West St Kilda,662.392900,662.054238,661.715750
Armadale,627.232853,626.920917,626.609136
Carlton North,559.000000,559.000000,559.000000
Carlton-Parkville,3527.407058,3498.165592,3469.166532
Collingwood-Abbotsford,1327.351207,1321.733193,1316.138958
...,...,...,...
Swan Hill,312.251606,312.251536,312.251466
Torquay,587.989719,586.424173,584.862796
Wangaratta,456.952940,456.952853,456.952766
Warragul,468.010448,467.096097,466.183532


In [175]:
# Calculate the average rental price across the years 2024, 2025, and 2026
final_forecast['avg_rental_2024_to_2026'] = final_forecast.mean(axis=1)

# Sort by the average price and get the top 10 suburbs
top_10_suburbs_avg = final_forecast.sort_values(by='avg_rental_2024_to_2026', ascending=False).head(10)

# Display the result
top_10_suburbs_avg


Unnamed: 0,2024,2025,2026,avg_rental_2024_to_2026
Werribee-Hoppers Crossing,4085.880224,3560.705838,3300.655269,3649.080444
Carlton-Parkville,3527.407058,3498.165592,3469.166532,3498.246394
Altona,2807.442476,2681.210651,2777.524935,2755.392687
South Yarra,2263.5,2263.5,2263.5,2263.5
Cranbourne,2079.5625,2079.5625,2079.5625,2079.5625
Sydenham,1796.151592,1759.151674,1727.930611,1761.077959
Bundoora-Greensborough-Hurstbridge,1732.891004,1732.236118,1731.581481,1732.236201
Docklands,1664.958333,1664.958333,1664.958333,1664.958333
Box Hill,1622.407397,1621.137116,1619.867831,1621.137448
Essendon,1432.747354,1432.325398,1431.903565,1432.325439
