In [42]:
import numpy as np
import pandas as pd
import itertools

In [43]:
df = pd.read_csv('../data/curated/rent.csv')

In [44]:
# Create the postcode column using the unique postcodes from df
postcodes = df['postcode'].unique()

# Create the Year column (2025-2027)
years = np.arange(2025, 2028)

# Create the Month column (1-12)
months = np.arange(1, 13, 4)

# Apartment values: 0 or 1
apartments = [0, 1]

# Initialize an empty list to store the rows
rows = []


In [45]:
# Generate all possible combinations of postcode, year, month, and apartment
for postcode, year, month, apartment in itertools.product(postcodes, years, months, apartments):
    
    # If Apartment == 1, Bed can be 1 or 2
    if apartment == 1:
        bed_values = [1, 2]
    # If Apartment == 0, Bed can be 2, 3, or 4
    else:
        bed_values = [2, 3, 4]
    
    # Create all combinations for this specific postcode, year, month, and apartment
    for bed in bed_values:
        rows.append([postcode, year, month, bed, apartment])

# Create the DataFrame with the results
new_df = pd.DataFrame(rows, columns=['postcode', 'Year', 'Month', 'Bed', 'Apartment'])

# Display the first few rows of the new DataFrame
new_df.head(3)

Unnamed: 0,postcode,Year,Month,Bed,Apartment
0,3206,2025,1,2,0
1,3206,2025,1,3,0
2,3206,2025,1,4,0


In [46]:
# load population data 
population = pd.read_csv('../../data/landing/population_2000-27.csv')

# Rename 'Postcode' column to 'postcode' and 'year' column to 'Year'
population = population.rename(columns={'Postcode': 'postcode'})

# Reshape the DataFrame using pd.melt
population = pd.melt(population, id_vars=['postcode'], var_name='Year', value_name='population')

# Convert 'year' to an integer (since it will be treated as a string after melting)
population['Year'] = population['Year'].astype(int)


test = pd.merge(new_df, population, on=['postcode','Year'], how='inner') 
test.head(5)

Unnamed: 0,postcode,Year,Month,Bed,Apartment,population
0,3206,2025,1,2,0,18240
1,3206,2025,1,3,0,18240
2,3206,2025,1,4,0,18240
3,3206,2025,1,1,1,18240
4,3206,2025,1,2,1,18240


In [47]:
# load income data 
income = pd.read_csv('../../data/landing/income_2000-27.csv')

# Rename 'Postcode' column to 'postcode'
income = income.rename(columns={'Postcode': 'postcode'})

# Reshape the DataFrame using pd.melt
income = pd.melt(income, id_vars=['postcode'], var_name='Year', value_name='income')

# Convert 'year' to an integer (since it will be treated as a string after melting)
income['Year'] = income['Year'].astype(int)

test = pd.merge(test, income, on=['postcode','Year'], how='inner') 
test.head(5)

Unnamed: 0,postcode,Year,Month,Bed,Apartment,population,income
0,3206,2025,1,2,0,18240,154278
1,3206,2025,1,3,0,18240,154278
2,3206,2025,1,4,0,18240,154278
3,3206,2025,1,1,1,18240,154278
4,3206,2025,1,2,1,18240,154278


In [48]:
file_path = '../../data/curated/test.csv'

# Save the DataFrame to the specified path in CSV format
new_df.to_csv(file_path, index=False)