In [465]:
import pandas as pd
import numpy as np

In [466]:
df = pd.read_csv('../../data/curated/AllProperties.csv')

In [467]:
df.columns = df.columns.str.strip()

years = [2018, 2019, 2020, 2021, 2022, 2023]
df_weekly_rent = df[['Suburb']].copy()

for year in years:
    year_cols = [col for col in df.columns if col.startswith(str(year))]
    df[year_cols] = df[year_cols].apply(pd.to_numeric, errors='coerce')
    
    df_temp = df[year_cols].replace(0, np.nan)
    avg_without_zeros = df_temp.mean(axis=1, skipna=True)
    
    all_zero_mask = (df[year_cols] == 0).all(axis=1)
    column_name = f"{year}_weekly_rent"
    df_weekly_rent[column_name] = np.where(all_zero_mask, 0, avg_without_zeros)

df_weekly_rent

Unnamed: 0,Suburb,2018_weekly_rent,2019_weekly_rent,2020_weekly_rent,2021_weekly_rent,2022_weekly_rent,2023_weekly_rent
0,Albert Park-Middle Park-West St Kilda,737.375,720.875,702.750,711.250,684.375,665.5
1,Armadale,702.125,624.000,621.500,628.000,642.250,620.5
2,Carlton North,535.625,537.875,564.875,587.625,573.000,555.0
3,Carlton-Parkville,2418.875,2458.875,2663.875,2615.250,3372.250,3545.0
4,CBD-St Kilda Rd,5352.125,5629.750,6095.625,8814.125,9021.500,8963.0
...,...,...,...,...,...,...,...
141,Traralgon,691.375,621.500,594.375,593.750,633.500,644.0
142,Wanagaratta,451.875,460.375,473.125,450.250,457.375,449.0
143,Warragul,398.875,420.625,465.875,459.750,465.250,468.5
144,Warrnambool,751.125,695.750,663.000,614.750,633.375,643.5


In [468]:
df_new = df_weekly_rent.copy()

population_years = [2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026]
for year in population_years:
    population_column_name = f"{year}_population"
    df_new[population_column_name] = 0  # Assign zero to all entries

df_new

Unnamed: 0,Suburb,2018_weekly_rent,2019_weekly_rent,2020_weekly_rent,2021_weekly_rent,2022_weekly_rent,2023_weekly_rent,2018_population,2019_population,2020_population,2021_population,2022_population,2023_population,2024_population,2025_population,2026_population
0,Albert Park-Middle Park-West St Kilda,737.375,720.875,702.750,711.250,684.375,665.5,0,0,0,0,0,0,0,0,0
1,Armadale,702.125,624.000,621.500,628.000,642.250,620.5,0,0,0,0,0,0,0,0,0
2,Carlton North,535.625,537.875,564.875,587.625,573.000,555.0,0,0,0,0,0,0,0,0,0
3,Carlton-Parkville,2418.875,2458.875,2663.875,2615.250,3372.250,3545.0,0,0,0,0,0,0,0,0,0
4,CBD-St Kilda Rd,5352.125,5629.750,6095.625,8814.125,9021.500,8963.0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,Traralgon,691.375,621.500,594.375,593.750,633.500,644.0,0,0,0,0,0,0,0,0,0
142,Wanagaratta,451.875,460.375,473.125,450.250,457.375,449.0,0,0,0,0,0,0,0,0,0
143,Warragul,398.875,420.625,465.875,459.750,465.250,468.5,0,0,0,0,0,0,0,0,0
144,Warrnambool,751.125,695.750,663.000,614.750,633.375,643.5,0,0,0,0,0,0,0,0,0


In [469]:
numeric_cols = df_weekly_rent.select_dtypes(include=['number']).columns
has_zeros = (df_weekly_rent[numeric_cols] == 0).any().any()

print(f"Does df_weekly_rent have any zero values? {has_zeros}")

Does df_weekly_rent have any zero values? False


In [470]:
df_population = pd.read_csv('../../data/curated/population_by_sa2.csv')
df_income = pd.read_csv('../../data/curated/income_by_sa2.csv')

In [471]:
with open('missing_suburbs.txt') as f:
    missing_suburbs = f.read().splitlines()

#### Add population data to new_df for each suburb

In [472]:
for label in df_population.Label:
    if label in missing_suburbs:
        continue
    found = False
    for suburb in df_weekly_rent.Suburb.values:
        if label == 'Carlton' and suburb == 'Carlton-Parkville' or \
        label == 'Keilor' and suburb == 'Keilor' or \
        label == 'Pascoe Vale' and suburb == 'Pascoe Vale-Coburg North' or \
        label == 'Bendigo Surrounds - North' and suburb == 'North Bendigo' or \
        label == 'East Bendigo - Kennington' and suburb == 'Flora Hill-Bendigo East' or \
        label == 'Brunswick East' and suburb == 'East Brunswick' or \
        label == 'Brunswick West' and suburb == 'West Brunswick' or \
        label == 'Carlton North - Princes Hill' and suburb == 'Carlton North' or \
        label == 'Dandenong - North' and suburb == 'Dandenong North-Endeavour Hills' or \
        label == 'Dandenong - South' and suburb == 'Dandenong' or \
        label == 'Gladstone Park - Westmeadows' and suburb == 'Gladstone Park-Tullamarine' or \
        label == 'Hawthorn East' and suburb == 'East Hawthorn' or \
        label == 'Highett (West) - Cheltenham' and suburb == 'Cheltenham' or \
        label == 'Ivanhoe East - Eaglemont' and suburb == 'Ivanhoe-Ivanhoe East' or \
        label == 'North Geelong - Bell Park' and suburb == 'North Geelong' or \
        label == 'Research - North Warrandyte' and suburb == 'Eltham-Research-Montmorency' or \
        label == 'Geelong West - Hamlyn Heights' and suburb == 'Herne Hill-Geelong West' or \
        label == 'St Kilda - Central' and suburb == 'St Kilda' or \
        label == 'St Kilda - West' and suburb == 'Albert Park-Middle Park-West St Kilda' or \
        label == 'St Kilda East' and suburb == 'East St Kilda' or \
        label == 'West Footscray - Tottenham' and suburb == 'West Footscray' or \
        'Berwick' in label and 'Berwick' in suburb or \
        'Bundoora' in label and 'Bundoora' in suburb or \
        'Clayton' in label and 'Clayton' in suburb or \
        'Craigieburn' in label and 'Craigieburn' in suburb or \
        'Croydon' in label and 'Croydon' in suburb or \
        'Doncaster East' in label and 'Doncaster East' in suburb or \
        'Endeavour Hills' in label and 'Endeavour Hills' in suburb or \
        'Epping' in label and 'Epping' in suburb or \
        'Essendon' in label and 'Essendon' in suburb or \
        'Ferntree Gully' in label and 'Ferntree Gully' in suburb or \
        'Glen Waverley' in label and 'Glen Waverley' in suburb or \
        'Glenroy' in label and 'Glenroy' in suburb or \
        'Hampton Park' in label and 'Hampton Park' in suburb or \
        'Hoppers Crossing' in label and 'Hoppers Crossing' in suburb or \
        'Kew' in label and 'Kew' in suburb or \
        'Lalor' in label and 'Lalor' in suburb or \
        'CBD' in label and 'CBD' in suburb or \
        'Mildura' in label and 'Mildura' in suburb or \
        'Mill Park' in label and 'Mill Park' in suburb or \
        'Mornington' in label and 'Mornington' in suburb or \
        'Mount Waverley' in label and 'Mount Waverley' in suburb or \
        'Narre Warren' in label and 'Narre Warren' in suburb or \
        'Noble Park' in label and 'Noble Park' in suburb or \
        'Northcote' in label and 'Northcote' in suburb or \
        'Pakenham' in label and 'Pakenham' in suburb or \
        'Preston' in label and 'Preston' in suburb or \
        'Reservoir' in label and 'Reservoir' in suburb or \
        'Richmond' in label and 'Richmond' in suburb or \
        'Rowville' in label and 'Rowville' in suburb or \
        'Roxburgh' in label and 'Roxburgh' in suburb or \
        'Shepparton' in label and 'Shepparton' in suburb or \
        'South Yarra' in label and 'South Yarra' in suburb or \
        'Southbank' in label and 'Southbank' in suburb or \
        'Albans' in label and 'Albans' in suburb or \
        'Surrey' in label and 'Surrey' in suburb or \
        'Traralgon' in label and 'Traralgon' in suburb or \
        'Warrnambool' in label and 'Warrnambool' in suburb or \
        'Werribee' in label and 'Werribee' in suburb or \
        'West Melbourne' in label and 'West Melbourne' in suburb or \
        label in suburb:
            found = True
            # ADD POPULATION DATA HERE!
            break
    if not found:
        if 'west' in label.lower() or 'north' in label.lower() or 'south' in label.lower() or 'east' in label.lower() or 'central' in label.lower():
            print(label)