In [36]:
import numpy as np
import pandas as pd
import itertools

In [37]:
df = pd.read_csv('../data/curated/rent.csv')

In [38]:
# Create the postcode column using the unique postcodes from df
postcodes = df['postcode'].unique()

# Create the Year column (2025-2027)
years = np.arange(2025, 2028)


# Apartment values: 0 or 1
apartments = [0, 1]

# Initialize an empty list to store the rows
rows = []


In [39]:
len(postcodes)

175

In [40]:
# Generate all possible combinations of postcode, year, month, and apartment
for postcode, year, apartment in itertools.product(postcodes, years, apartments):
    
    # If Apartment == 1, Bed can be 1 or 2
    if apartment == 1:
        bed_values = [1, 2]
    # If Apartment == 0, Bed can be 2, 3, or 4
    else:
        bed_values = [2, 3, 4]
    
    # Create all combinations for this specific postcode, year, month, and apartment
    for bed in bed_values:
        rows.append([postcode, year, bed, apartment])

# Create the DataFrame with the results
new_df = pd.DataFrame(rows, columns=['postcode', 'Year', 'Bed', 'Apartment'])

# Display the first few rows of the new DataFrame
new_df.head(3)

Unnamed: 0,postcode,Year,Bed,Apartment
0,3206,2025,2,0
1,3206,2025,3,0
2,3206,2025,4,0


In [41]:
# load population data 
population = pd.read_csv('../data/landing/population_percent_change_2000-27.csv')

# Rename 'Postcode' column to 'postcode' and 'year' column to 'Year'
population = population.rename(columns={'Postcode': 'postcode'})

# Reshape the DataFrame using pd.melt
population = pd.melt(population, id_vars=['postcode'], var_name='Year', value_name='population')

# Convert 'year' to an integer (since it will be treated as a string after melting)
population['Year'] = population['Year'].astype(int)


test = pd.merge(new_df, population, on=['postcode','Year'], how='inner') 
test.head(5)

Unnamed: 0,postcode,Year,Bed,Apartment,population
0,3206,2025,2,0,0.015159
1,3206,2025,3,0,0.015159
2,3206,2025,4,0,0.015159
3,3206,2025,1,1,0.015159
4,3206,2025,2,1,0.015159


In [42]:
# load income data 
income = pd.read_csv('../data/landing/income_2000-27.csv')

# Rename 'Postcode' column to 'postcode'
income = income.rename(columns={'Postcode': 'postcode'})

# Reshape the DataFrame using pd.melt
income = pd.melt(income, id_vars=['postcode'], var_name='Year', value_name='income')

# Convert 'year' to an integer (since it will be treated as a string after melting)
income['Year'] = income['Year'].astype(int)

test = pd.merge(test, income, on=['postcode','Year'], how='inner') 
test.head(5)

Unnamed: 0,postcode,Year,Bed,Apartment,population,income
0,3206,2025,2,0,0.015159,154278
1,3206,2025,3,0,0.015159,154278
2,3206,2025,4,0,0.015159,154278
3,3206,2025,1,1,0.015159,154278
4,3206,2025,2,1,0.015159,154278


In [43]:
school_recent = pd.read_csv('../data/landing/schoolscount2023.csv')
transportation = pd.read_csv('../data/landing/transportation.csv')
park = pd.read_csv('../data/landing/parks.csv')
# Rename POSTCODE into postcode
park = park.rename(columns={'POSTCODE':'postcode'})

# Merge the dataframes on the postcode
test = pd.merge(test, school_recent, on='postcode', how='left') 
test = pd.merge(test, transportation, on='postcode', how='left') 
test = pd.merge(test, park, on='postcode', how='left') 

# Replace NaN with 0
test.fillna(0, inplace=True)

In [45]:
test

Unnamed: 0,postcode,Year,Bed,Apartment,population,income,Camp,Language,Primary,Secondary,Special,school_total,tram_count,bus_count,train_count,park_count,mean_park_area
0,3206,2025,2,0,0.015159,154278,0.0,0.0,3.0,1.0,0.0,4.0,33,19,0,2.0,109.9035
1,3206,2025,3,0,0.015159,154278,0.0,0.0,3.0,1.0,0.0,4.0,33,19,0,2.0,109.9035
2,3206,2025,4,0,0.015159,154278,0.0,0.0,3.0,1.0,0.0,4.0,33,19,0,2.0,109.9035
3,3206,2025,1,1,0.015159,154278,0.0,0.0,3.0,1.0,0.0,4.0,33,19,0,2.0,109.9035
4,3206,2025,2,1,0.015159,154278,0.0,0.0,3.0,1.0,0.0,4.0,33,19,0,2.0,109.9035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2620,3750,2027,2,0,0.292229,80949,0.0,0.0,4.0,2.0,0.0,6.0,0,24,0,2.0,817.0855
2621,3750,2027,3,0,0.292229,80949,0.0,0.0,4.0,2.0,0.0,6.0,0,24,0,2.0,817.0855
2622,3750,2027,4,0,0.292229,80949,0.0,0.0,4.0,2.0,0.0,6.0,0,24,0,2.0,817.0855
2623,3750,2027,1,1,0.292229,80949,0.0,0.0,4.0,2.0,0.0,6.0,0,24,0,2.0,817.0855


In [44]:
file_path = '../data/curated/test.csv'

# Save the DataFrame to the specified path in CSV format
test.to_csv(file_path, index=False)