
# Property Forecaster Analysis

This notebook analyzes the prescience of three property market forecasters (Westpac, Joe Bloggs, and Harry Spent) 
and evaluates their accuracy in predicting median house price changes over the years.


In [1]:

import pandas as pd

# Load the data
file_path = 'forecast_history.csv'  
forecast_data = pd.read_csv(file_path)

# Display the first few rows
forecast_data.head()




Unnamed: 0.1,Unnamed: 0,Median house price,Westpac: 4 year forecast,Joe Bloggs: 2 year forecast,Harry Spent: 5 year forecast
0,2011,340000,56%,23%,-20%
1,2012,370000,53%,34$,-80%
2,2013,350000,,19%,-70%
3,2014,420000,13%,42%,-80%
4,2015,425000,33%,23%,-50%


In [2]:

# Clean the data by removing non-numeric characters and convert to numeric values

forecast_data.columns = ['Year', 'Median house price', 'Westpac forecast', 'Joe Bloggs forecast', 'Harry Spent forecast']

# Replacing erroneous entries in the forecasts
forecast_data['Westpac forecast'] = forecast_data['Westpac forecast'].replace('I5%', '15%')
forecast_data['Joe Bloggs forecast'] = forecast_data['Joe Bloggs forecast'].str.replace('$', '')

# Convert forecast columns to numeric values
forecast_data['Westpac forecast'] = forecast_data['Westpac forecast'].str.replace('%', '').astype(float)
forecast_data['Joe Bloggs forecast'] = forecast_data['Joe Bloggs forecast'].str.replace('%', '').astype(float)
forecast_data['Harry Spent forecast'] = forecast_data['Harry Spent forecast'].str.replace('%', '').astype(float)

# Convert 'Median house price' to numeric
forecast_data['Median house price'] = pd.to_numeric(forecast_data['Median house price'], errors='coerce')

# Calculate year-over-year percentage change in median house prices
forecast_data['Actual change'] = forecast_data['Median house price'].pct_change() * 100

# Drop the first row due to lack of previous year data for percentage change
forecast_data.dropna(subset=['Actual change'], inplace=True)

forecast_data.head()


Unnamed: 0,Year,Median house price,Westpac forecast,Joe Bloggs forecast,Harry Spent forecast,Actual change
1,2012,370000.0,53.0,34.0,-80.0,8.823529
2,2013,350000.0,,19.0,-70.0,-5.405405
3,2014,420000.0,13.0,42.0,-80.0,20.0
4,2015,425000.0,33.0,23.0,-50.0,1.190476
5,2016,500000.0,-5.0,15.0,-90.0,17.647059


In [4]:

# Calculate absolute error for each forecaster
forecast_data['Westpac error'] = abs(forecast_data['Actual change'] - forecast_data['Westpac forecast'])
forecast_data['Joe Bloggs error'] = abs(forecast_data['Actual change'] - forecast_data['Joe Bloggs forecast'])
forecast_data['Harry Spent error'] = abs(forecast_data['Actual change'] - forecast_data['Harry Spent forecast'])

# Display the cleaned data with errors
forecast_data[['Year', 'Median house price', 'Actual change', 'Westpac error', 'Joe Bloggs error', 'Harry Spent error']]


Unnamed: 0,Year,Median house price,Actual change,Westpac error,Joe Bloggs error,Harry Spent error
1,2012,370000.0,8.823529,44.176471,25.176471,88.823529
2,2013,350000.0,-5.405405,,24.405405,64.594595
3,2014,420000.0,20.0,7.0,22.0,100.0
4,2015,425000.0,1.190476,31.809524,21.809524,51.190476
5,2016,500000.0,17.647059,22.647059,2.647059,107.647059
6,2017,520000.0,4.0,41.0,1496.0,34.0
7,2018,550000.0,5.769231,28.230769,12.230769,
8,2019,596000.0,8.363636,25.636364,10.636364,118.363636
9,2020,610000.0,2.348993,17.651007,20.651007,92.348993
10,2021,660000.0,8.196721,28.196721,4.803279,68.196721


In [5]:

# Summarize the average errors for each forecaster over the years
summary = {
    'Forecaster': ['Westpac', 'Joe Bloggs', 'Harry Spent'],
    'Average Error': [
        forecast_data['Westpac error'].mean(),
        forecast_data['Joe Bloggs error'].mean(),
        forecast_data['Harry Spent error'].mean()
    ]
}

summary_df = pd.DataFrame(summary)

# Display the summary
summary_df


Unnamed: 0,Forecaster,Average Error
0,Westpac,30.593176
1,Joe Bloggs,133.870146
2,Harry Spent,74.426419


In [6]:

# Trend Accuracy Calculation
# Compare the direction (increase or decrease) of the actual change vs each forecaster's prediction

# Define a function to check if the trend matches
def trend_accuracy(actual, forecast):
    actual_trend = actual > 0
    forecast_trend = forecast > 0
    return actual_trend == forecast_trend

# Apply the trend accuracy check for each forecaster
forecast_data['Westpac trend accuracy'] = forecast_data.apply(lambda row: trend_accuracy(row['Actual change'], row['Westpac forecast']), axis=1)
forecast_data['Joe Bloggs trend accuracy'] = forecast_data.apply(lambda row: trend_accuracy(row['Actual change'], row['Joe Bloggs forecast']), axis=1)
forecast_data['Harry Spent trend accuracy'] = forecast_data.apply(lambda row: trend_accuracy(row['Actual change'], row['Harry Spent forecast']), axis=1)

# Calculate the percentage of years where each forecaster predicted the correct trend
trend_summary = {
    'Forecaster': ['Westpac', 'Joe Bloggs', 'Harry Spent'],
    'Trend Accuracy (%)': [
        forecast_data['Westpac trend accuracy'].mean() * 100,
        forecast_data['Joe Bloggs trend accuracy'].mean() * 100,
        forecast_data['Harry Spent trend accuracy'].mean() * 100
    ]
}

trend_summary_df = pd.DataFrame(trend_summary)
trend_summary_df


Unnamed: 0,Forecaster,Trend Accuracy (%)
0,Westpac,76.923077
1,Joe Bloggs,84.615385
2,Harry Spent,23.076923
