In [27]:
# dependencies
import pandas as pd
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt

In [28]:
# read in the aggregated data
aggregate_df = pd.read_csv('AGGREGATE.csv')

In [29]:
# create aggregate DataFrame
aggregate_df = pd.DataFrame(aggregate_df)

In [30]:
# create year column header to the aggregate DataFrame
aggregate_df['Year'] = pd.DatetimeIndex(aggregate_df['Date']).year


In [31]:
# remove the decimal from the year column
aggregate_df['Year'] = aggregate_df['Year'].fillna(0).astype(int)


In [32]:
# remove the colums that are not needed
aggregate_df = aggregate_df.drop(['Composite_HPI_SA','Single_Family_HPI_SA','One_Storey_HPI_SA','Two_Storey_HPI_SA','Townhouse_HPI_SA','Apartment_HPI_SA'], axis=1)

In [33]:
# remove the NaN values
aggregate_df = aggregate_df.dropna()

In [45]:
# create a growth rate headers
aggregate_df['composite_growth_rate'] = aggregate_df['Composite_Benchmark_SA'] / aggregate_df['Composite_Benchmark_SA'].shift(1) - 1
aggregate_df['single_family_growth_rate'] = aggregate_df['Single_Family_Benchmark_SA'] / aggregate_df['Single_Family_Benchmark_SA'].shift(1) - 1
aggregate_df['one_storey_growth_rate'] = aggregate_df['One_Storey_Benchmark_SA'] / aggregate_df['One_Storey_Benchmark_SA'].shift(1) - 1
aggregate_df['two_storey_growth_rate'] = aggregate_df['Two_Storey_Benchmark_SA'] / aggregate_df['Two_Storey_Benchmark_SA'].shift(1) - 1
aggregate_df['townhouse_growth_rate'] = aggregate_df['Townhouse_Benchmark_SA'] / aggregate_df['Townhouse_Benchmark_SA'].shift(1) - 1
aggregate_df['apartment_growth_rate'] = aggregate_df['Apartment_Benchmark_SA'] / aggregate_df['Apartment_Benchmark_SA'].shift(1) - 1


In [47]:
# create a DataFrame for just the year 2020 to 2023
aggregate_df_2020_2023 = aggregate_df[aggregate_df['Year'] >= 2020]


In [50]:
# create a DataFrame for just the year 2016 to 2019
aggregate_df_2016_2019 = aggregate_df[(aggregate_df['Year'] >= 2016) & (aggregate_df['Year'] <= 2019)]


In [52]:
# a paired t-test to compare the growth rate of the composite benchmark from 2016 to 2019 and 2020 to 2023
t_stat, p_value = stats.ttest_rel(aggregate_df_2016_2019['composite_growth_rate'], aggregate_df_2020_2023['composite_growth_rate'])
print(t_stat, p_value)

-1.1963707874161464 0.23755236328726412


In [53]:
# a paired t-test to compare the growth rate of the single family benchmark from 2016 to 2019 and 2020 to 2023
t_stat, p_value = stats.ttest_rel(aggregate_df_2016_2019['single_family_growth_rate'], aggregate_df_2020_2023['single_family_growth_rate'])
print(t_stat, p_value)

-1.519280075023866 0.13539101611072157


In [55]:
# a paired t-test to compare the growth rate of the one storey benchmark from 2016 to 2019 and 2020 to 2023
t_stat, p_value = stats.ttest_rel(aggregate_df_2016_2019['one_storey_growth_rate'], aggregate_df_2020_2023['one_storey_growth_rate'])
print(t_stat, p_value)

-1.320087765866604 0.1931990697202384


In [57]:
# a paired t-test to compare the growth rate of the two storey benchmark from 2016 to 2019 and 2020 to 2023
t_stat, p_value = stats.ttest_rel(aggregate_df_2016_2019['two_storey_growth_rate'], aggregate_df_2020_2023['two_storey_growth_rate'])
print(t_stat, p_value)

-1.5355695387006982 0.13134911510730138


In [58]:
# a paired t-test to compare the growth rate of the townhouse benchmark from 2016 to 2019 and 2020 to 2023
t_stat, p_value = stats.ttest_rel(aggregate_df_2016_2019['townhouse_growth_rate'], aggregate_df_2020_2023['townhouse_growth_rate'])
print(t_stat, p_value)

-0.09951500077629977 0.9211526362306504


In [59]:
# a paired t-test to compare the growth rate of the apartment benchmark from 2016 to 2019 and 2020 to 2023
t_stat, p_value = stats.ttest_rel(aggregate_df_2016_2019['apartment_growth_rate'], aggregate_df_2020_2023['apartment_growth_rate'])
print(t_stat, p_value)

1.5945306378686976 0.11752065970612852


In [60]:
# create a summary DataFrame for the test results
summary_df = pd.DataFrame({'Composite Benchmark': [t_stat, p_value],
                            'Single Family Benchmark': [t_stat, p_value],
                            'One Storey Benchmark': [t_stat, p_value],
                            'Two Storey Benchmark': [t_stat, p_value],
                            'Townhouse Benchmark': [t_stat, p_value],
                            'Apartment Benchmark': [t_stat, p_value]},
                            index=['t-statistic', 'p-value'])

# print the summary DataFrame
print(summary_df)

             Composite Benchmark  Single Family Benchmark  \
t-statistic             1.594531                 1.594531   
p-value                 0.117521                 0.117521   

             One Storey Benchmark  Two Storey Benchmark  Townhouse Benchmark  \
t-statistic              1.594531              1.594531             1.594531   
p-value                  0.117521              0.117521             0.117521   

             Apartment Benchmark  
t-statistic             1.594531  
p-value                 0.117521  
