> ## Median Price by Suburb

> ### Import libraries and functions

In [55]:
import pandas as pd

> ### Rental price by suburb

In [56]:
# Get the preprocessed property data
property_df = pd.read_csv('../data/raw/preprocessed properties.csv')

In [57]:
# Define the interested combinations of property types and bedrooms
combinations = [
    ('Apartment / Unit / Flat', 1),
    ('Apartment / Unit / Flat', 2),
    ('Apartment / Unit / Flat', 3),
    ('House', 2),
    ('House', 3),
    ('House', 4)
]

median_price_df = pd.DataFrame()

# Compute median price for the above combinations by suburb
for property_type, bedrooms in combinations:

    # Filter out properties of the current combination
    filtered_df = property_df[(property_df['property type'] == property_type) 
                              & (property_df['bedrooms'] == bedrooms)]
    
    # Compute the median rental price
    median_price = filtered_df.groupby('postcode')['price (AUD per week)'].median().rename(
                                            f'median {bedrooms} bedroom {property_type}')
    median_price_df = pd.concat([median_price_df, median_price], axis=1)

In [58]:
# Compute the median rental price for all properties by suburb
median_price_all = property_df.groupby(['postcode', 'suburb'])['price (AUD per week)'].mean().rename(
                                                        'median all properties')

# Combining the median price for all properties and for the properties of interest
result_df = median_price_all.to_frame().join(median_price_df, on='postcode', how='left')
result_df.reset_index(inplace=True)

In [59]:
# Show the final df
result_df

Unnamed: 0,postcode,suburb,median all properties,median 1 bedroom Apartment / Unit / Flat,median 2 bedroom Apartment / Unit / Flat,median 3 bedroom Apartment / Unit / Flat,median 2 bedroom House,median 3 bedroom House,median 4 bedroom House
0,3000,MELBOURNE,602.659574,527.5,625.0,,,,
1,3002,EAST MELBOURNE,718.214286,475.0,740.0,,625.0,800.0,
2,3003,WEST MELBOURNE,673.750000,520.0,650.0,1100.0,690.0,,
3,3004,MELBOURNE,788.181818,550.0,752.5,1025.0,,,
4,3006,SOUTHBANK,685.000000,540.0,700.0,1100.0,830.0,825.0,
...,...,...,...,...,...,...,...,...,...
151,3936,SAFETY BEACH,700.000000,,,,,,700.0
152,3939,ROSEBUD,720.000000,,,,,720.0,
153,3941,RYE,650.000000,,,,,625.0,
154,3941,TOOTGAROOK,600.000000,,,,,625.0,


In [60]:
# Save the final df
result_df.to_csv('../data/raw/median price per postcode.csv', index=False)

> ### Aggregate with historical data

In [37]:
# Read the CSV file for 2024 median price
median_2024 = pd.read_csv('../data/raw/median price per postcode.csv')

# Read the cleaned historical CSV files
one_bed_flat = pd.read_csv('../data/raw/historical/cleaned 1 bedroom flat.csv')
two_bed_flat = pd.read_csv('../data/raw/historical/cleaned 2 bedroom flat.csv')
three_bed_flat = pd.read_csv('../data/raw/historical/cleaned 3 bedroom flat.csv')
two_bed_house = pd.read_csv('../data/raw/historical/cleaned 2 bedroom house.csv')
three_bed_house = pd.read_csv('../data/raw/historical/cleaned 3 bedroom house.csv')
four_bed_house = pd.read_csv('../data/raw/historical/cleaned 4 bedroom house.csv')
all_properties = pd.read_csv('../data/raw/historical/cleaned All properties.csv')

In [38]:
# Get the types of properties in the current 2024 median price
columns_2024 = median_2024.columns[2:].tolist()

columns_2024

['median all properties',
 'median 1 bedroom Apartment / Unit / Flat',
 'median 2 bedroom Apartment / Unit / Flat',
 'median 3 bedroom Apartment / Unit / Flat',
 'median 2 bedroom House',
 'median 3 bedroom House',
 'median 4 bedroom House']

In [39]:
# Make a list of historical dataframes to be loop through
historical_data = [all_properties, one_bed_flat, two_bed_flat, three_bed_flat, 
                   two_bed_house, three_bed_house, four_bed_house]

In [40]:
# Make the suburbs to lowercase to merge with other historical dataframes
median_2024['suburb'] = median_2024['suburb'].str.lower()

# The 'Melbourne' suburb is 'CBD' in the historical data
# So replace 'Melbourne' by 'CBD' to merge it with historical data
median_2024['suburb'] = median_2024['suburb'].replace('melbourne', 'cbd')


In [53]:
# Start merging
merged_dataframes = []
for i in range(len(historical_data)):
    historical_data[i]['suburb'] = historical_data[i]['suburb'].str.lower()
    merged_df = historical_data[i].merge(median_2024[['postcode', 'suburb', columns_2024[i]]], 
                                         on='suburb', how='left')
    
    # Reorder to make 'postcode' the first column in the merged df
    merged_df = merged_df[['postcode'] + [col for col in merged_df.columns if col != 'postcode']]

    # Rename the new merged column
    merged_df.columns.values[-1] = 'Sep 2024'
    
    merged_dataframes.append(merged_df)

In [54]:
merged_dataframes[0]

Unnamed: 0,postcode,suburb,Mar 2000,Jun 2000,Sep 2000,Dec 2000,Mar 2001,Jun 2001,Sep 2001,Dec 2001,...,Mar 2021,Jun 2021,Sep 2021,Dec 2021,Mar 2022,Jun 2022,Sep 2022,Dec 2022,Mar 2023,Sep 2024
0,3206.0,albert park,260,260,270,275,275,280,280,290,...,500,500,500,495,500,515,500,525,545,1284.166667
1,3206.0,middle park,260,260,270,275,275,280,280,290,...,500,500,500,495,500,515,500,525,545,625.000000
2,,west st kilda,260,260,270,275,275,280,280,290,...,500,500,500,495,500,515,500,525,545,
3,3143.0,armadale,200,200,205,210,215,220,225,230,...,450,440,425,420,430,450,450,460,490,836.428571
4,3054.0,carlton north,260,260,265,270,270,275,280,280,...,580,575,575,580,580,595,600,600,620,735.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,,narre warren,165,165,170,170,170,175,175,175,...,385,390,395,400,405,410,420,430,440,
155,,hampton park,165,165,170,170,170,175,175,175,...,385,390,395,400,405,410,420,430,440,
156,3174.0,noble park,140,140,140,145,145,145,145,145,...,360,355,360,360,360,370,375,380,385,360.000000
157,,pakenham,155,155,160,160,160,165,165,165,...,360,365,370,375,380,390,400,410,420,


In [None]:


# Step 2: Find matching suburbs (suburbs in both DataFrames)
matching_suburbs = pd.merge(dataframe_list[-1], median_2024, on='suburb', how='inner')

# Step 3: Find non-matching suburbs in each DataFrame
non_matching_df1 = dataframe_list[-1][~dataframe_list[-1]['suburb'].isin(median_2024['suburb'])]  # Suburbs in df1 but not in df2
non_matching_df2 = median_2024[~median_2024['suburb'].isin(dataframe_list[-1]['suburb'])]  # Suburbs in df2 but not in df1

# Print results
print("Matching suburbs:")
print(matching_suburbs)

Matching suburbs:
           suburb  Mar 2000  Jun 2000  Sep 2000  Dec 2000  Mar 2001  Jun 2001  \
0     albert park       260       260       270       275       275       280   
1     middle park       260       260       270       275       275       280   
2        armadale       200       200       205       210       215       220   
3   carlton north       260       260       265       270       270       275   
4         carlton       260       260       260       260       260       260   
..            ...       ...       ...       ...       ...       ...       ...   
72        preston       165       170       170       171       175       175   
73      reservoir       160       160       165       165       165       165   
74      thornbury       150       150       160       160       160       165   
75     noble park       140       140       140       145       145       145   
76     springvale       145       145       145       150       150       150   

    Sep 2

In [None]:
print("\nSuburbs in df1 but not in df2:")
print(non_matching_df1['suburb'])


Suburbs in df1 but not in df2:
2        west st kilda
7                  cbd
8          st kilda rd
13       east st kilda
32              balwyn
            ...       
151    dandenong north
152    endeavour hills
153       narre warren
154       hampton park
156           pakenham
Name: suburb, Length: 81, dtype: object


In [None]:
print("\nSuburbs in df2 but not in df1:")
print(non_matching_df2['suburb'])


Suburbs in df2 but not in df1:
0             melbourne
3             melbourne
9      south kingsville
12               albion
13             cairnlea
             ...       
151        safety beach
152             rosebud
153                 rye
154          tootgarook
155         clyde north
Name: suburb, Length: 79, dtype: object
