In [28]:
import pandas as pd

# Read the CSV file into a DataFrame
recent_rent = pd.read_csv('../data/landing/2024_rent.csv')
history_rent = pd.read_csv('../data/landing/history_rent.csv')
postcode = pd.read_csv('../data/raw/external/australian-postcodes.csv')

In [29]:
# Filter the DataFrame to keep only rows where 'State' is 'VIC'
postcode = postcode[postcode['State'] == 'VIC']
postcode.head(3)

Unnamed: 0,Postcode,Suburb,State,Lat,Lon
5584,3000,Melbourne,VIC,-37.81,144.97
5585,3001,Melbourne,VIC,-38.37,144.77
5586,3002,East Melbourne,VIC,-37.82,144.99


In [30]:
# Splitting the 'Location' column by '-' and expanding it into multiple rows
history_rent = history_rent.assign(Location=history_rent['Location'].str.split('-')).explode('Location').reset_index(drop=True)

In [31]:
history_rent = history_rent.drop(columns=['Zone'])
history_rent.head(3)

Unnamed: 0,Location,Year,Month,Count,Median,Bed,Apartment
0,Albert Park,2000,Dec,369,175,1,1
1,Middle Park,2000,Dec,369,175,1,1
2,West St Kilda,2000,Dec,369,175,1,1


In [32]:
# Concatenate the two DataFrames along rows (axis=0)
rent = pd.concat([history_rent, recent_rent], ignore_index=True)

# Rename Location into Suburb
rent = rent.rename(columns={'Location':'Suburb'})

In [33]:
# Convert the 'Median' column to numeric (handle errors), round floats, fill NaN with a default value, then convert to integer
rent['Median'] = pd.to_numeric(rent['Median'], errors='coerce').round().fillna(0).astype(int)
rent.head(3)

Unnamed: 0,Suburb,Year,Month,Count,Median,Bed,Apartment
0,Albert Park,2000,Dec,369,175,1,1
1,Middle Park,2000,Dec,369,175,1,1
2,West St Kilda,2000,Dec,369,175,1,1


In [34]:
file_path = '../data/landing/rent.csv'

# Save the DataFrame to the specified path in CSV format
rent.to_csv(file_path, index=False)