<h2>The Linear Regression Model and XGBoost Model are used to predict yearly rent median of next 5 years.</h2>

<h2>Linear Regression Model</h2>

In [5]:
import pandas as pd
from sklearn.linear_model import LinearRegression

df_linear = pd.read_csv('../data/curated/model_predict.csv')

# features according to feature_selection.ipynb
rent_median_features = ['2018_rent_median', '2011_rent_count', '2020_rent_count', 
                        '2011_rent_median', '2020_rent_median', '2017_rent_median', 
                        '2003_rent_count', '2010_rent_count', '2022_rent_count', 
                        '2020_rent_median', '2021_rent_count', '2016_rent_median', 
                        '2021_rent_median', '2015_rent_count', '2009_rent_median', 
                        '2015_rent_median', 'income_growth_rate', '2007_rent_count', 
                        '2010_rent_median', '2006_rent_median', '2021_rent_median', 
                        '2006_population', '2005_rent_median', '2012_rent_median', 
                        '2007_rent_median', '2013_rent_median', 
                        '2004_rent_median', '2002_rent_median']

# calculate 2023.5_rent_median_growth
df_linear['2023.5_rent_median_growth'] = (df_linear['2023.5_rent_median'] - df_linear['2022_rent_median']) / df_linear['2022_rent_median']

# creat Linear Regression model
model_median = LinearRegression()

for year in [2025, 2026, 2027, 2028, 2029] :
    # predict rent median
    X_median = df_linear[rent_median_features].copy()

    # on first year, using 2023.5 data
    if year == 2025:
        y_median = df_linear[['2023.5_rent_median']]
    else:
        y_median = df_linear[[f'{year-1}_rent_median']]  # use previous year data
    
    X_median_np = X_median.values
    y_median_np = y_median.values

    # fit model
    model_median.fit(X_median_np, y_median_np)
    df_linear[f'{year}_rent_median'] = model_median.predict(X_median_np)

    # calculate median_growth
    if year == 2025:
        df_linear[f'{year}_rent_median_growth'] = (df_linear[f'{year}_rent_median'] - df_linear['2023.5_rent_median']) / df_linear['2023.5_rent_median']
    else:
        df_linear[f'{year}_rent_median_growth'] = (df_linear[f'{year}_rent_median'] - df_linear[f'{year-1}_rent_median']) / df_linear[f'{year-1}_rent_median']

df_linear.to_csv('../data/curated/LinearModel_predict_2025_2027.csv', index=False)

print(df_linear.head(3))


                                  suburb  postcode        SA2  \
0  Albert Park-Middle Park-West St Kilda    3004.0  206041119   
1  Albert Park-Middle Park-West St Kilda    3004.0  206041125   
2  Albert Park-Middle Park-West St Kilda    3004.0  206051128   

             SA2 NAME                                           geometry  \
0      East Melbourne  POLYGON ((144.9739585748236 -37.81330241814959...   
1  South Yarra - West  POLYGON ((144.97455041239857 -37.8346653683224...   
2         Albert Park  POLYGON ((144.96766973330057 -37.8373669078686...   

   2000_rent_count  2000_rent_median  2001_rent_count  2001_rent_median  \
0           4632.0            266.25           4990.0            281.25   
1           4632.0            266.25           4990.0            281.25   
2           4632.0            266.25           4990.0            281.25   

   2002_rent_count  ...  2025_rent_median  2025_rent_median_growth  \
0             4097  ...        584.919684                 0.121

In [9]:
# Find the maximum growth rate for each suburb
df_linear['max_growth_rate'] = df_linear[['2025_rent_median_growth','2026_rent_median_growth','2027_rent_median_growth',
                                 '2028_rent_median_growth', '2029_rent_median_growth']].mean(axis=1) * 100
# Get the top 10 suburbs with the highest predicted growth rate top 10 suburbs 
top_10_suburbs = df_linear.nlargest(10, 'max_growth_rate')[['suburb', 'postcode', 'max_growth_rate']]
top_10_suburbs

Unnamed: 0,suburb,postcode,max_growth_rate
208,Wodonga,3690.0,16.602522
186,Horsham,3400.0,10.773718
19,Flemington-Kensington,3031.0,8.006055
77,Newport-Spotswood,3015.0,7.86175
20,Flemington-Kensington,3031.0,7.595238
110,West Brunswick,3055.0,7.163723
170,Mount Clear-Buninyong,3350.0,7.154636
92,Yarraville-Seddon,3013.0,7.13876
181,Benalla,3671.0,6.834704
169,Mount Clear-Buninyong,3350.0,6.789846


<h2>XGBoost Model</h2>

In [10]:
import pandas as pd
import xgboost as xgb

df = pd.read_csv('../data/curated/model_predict.csv')

# features according to feature_selection.ipynb
rent_median_features = ['2018_rent_median', '2011_rent_count', '2020_rent_count', 
                        '2011_rent_median', '2020_rent_median', '2017_rent_median', 
                        '2003_rent_count', '2010_rent_count', '2022_rent_count', 
                        '2020_rent_median', '2021_rent_count', '2016_rent_median', 
                        '2021_rent_median', '2015_rent_count', '2009_rent_median', 
                        '2015_rent_median', 'income_growth_rate', '2007_rent_count', 
                        '2010_rent_median', '2006_rent_median', '2021_rent_median', 
                        '2006_population', '2005_rent_median', '2012_rent_median', 
                        '2007_rent_median', '2013_rent_median', 
                        '2004_rent_median', '2002_rent_median']


# calculate 2023.5_rent_median_growth
df['2023.5_rent_median_growth'] = (df['2023.5_rent_median'] - df['2022_rent_median']) / df['2022_rent_median']

for year in [2025, 2026, 2027, 2028, 2029]:
    X_median = df[rent_median_features].copy()
    
    if year == 2025:
        # on first year, using 2023.5 data
        y_median = df[['2023.5_rent_median']]
    else:
        y_median = df[f'{year-1}_rent_median']  # use previous year data
        
    X_median_np = X_median.values
    y_median_np = y_median.values
    
    # fit model
    model_median = xgb.XGBRegressor(n_estimators=100, learning_rate=0.05, max_depth=6, random_state=42)
    model_median.fit(X_median_np, y_median_np)
    df[f'{year}_rent_median'] = model_median.predict(X_median_np)

    # calculate rent_median_growth
    if year == 2025:
        df[f'{year}_rent_median_growth'] = (df[f'{year}_rent_median'] - df['2023.5_rent_median']) / df['2023.5_rent_median']
    else:
        df[f'{year}_rent_median_growth'] = (df[f'{year}_rent_median'] - df[f'{year-1}_rent_median']) / df[f'{year-1}_rent_median']

df.to_csv('../data/curated/XGmodel_predict_2025_2027.csv', index=False)

print(df.head(3))


                                  suburb  postcode        SA2  \
0  Albert Park-Middle Park-West St Kilda    3004.0  206041119   
1  Albert Park-Middle Park-West St Kilda    3004.0  206041125   
2  Albert Park-Middle Park-West St Kilda    3004.0  206051128   

             SA2 NAME                                           geometry  \
0      East Melbourne  POLYGON ((144.9739585748236 -37.81330241814959...   
1  South Yarra - West  POLYGON ((144.97455041239857 -37.8346653683224...   
2         Albert Park  POLYGON ((144.96766973330057 -37.8373669078686...   

   2000_rent_count  2000_rent_median  2001_rent_count  2001_rent_median  \
0           4632.0            266.25           4990.0            281.25   
1           4632.0            266.25           4990.0            281.25   
2           4632.0            266.25           4990.0            281.25   

   2002_rent_count  ...  2025_rent_median  2025_rent_median_growth  \
0             4097  ...        524.634094                 0.006

In [11]:
# Find the maximum growth rate for each suburb
df['max_growth_rate'] = df[['2025_rent_median_growth','2026_rent_median_growth','2027_rent_median_growth',
                           '2028_rent_median_growth', '2029_rent_median_growth']].mean(axis=1) * 100
# Get the top 10 suburbs with the highest predicted growth rate top 10 suburbs 
top_10_suburbs = df.nlargest(10, 'max_growth_rate')[['suburb', 'postcode', 'max_growth_rate']]
top_10_suburbs

Unnamed: 0,suburb,postcode,max_growth_rate
208,Wodonga,3690.0,5.755847
185,Hamilton,3300.0,5.035061
24,Prahran-Windsor,3181.0,4.948306
14,East St Kilda,3183.0,3.936006
74,Keilor East-Avondale Heights,3034.0,3.639436
118,Mill Park-Epping,3082.0,3.60524
6,Carlton-Parkville,3053.0,3.562472
43,Chadstone-Oakleigh,3166.0,3.52612
89,Werribee-Hoppers Crossing,3029.0,3.092071
104,Oak Park-Glenroy-Fawkner,3046.0,3.015459
