In [41]:
import random
import pandas as pd

# Define possible housing types
housing_types = ["1&2-Room Flat", "3-Room Flat", "4-Room Flat", "5-Room", "Executive Flat", "Condominium", "Apartment", "Landed Property"]

# Initialize list to store generated data
data = []

for _ in range(5000):
    # Generate age (at least 25 years old)
    age = random.randint(25, 70)
    
    # Select a random housing type
    housingtype = random.choice(housing_types)
    
    # Generate yearly income (in thousands)
    yearly_income = random.randint(24000, 200000) 
    
    # Generate CPF balance
    cpf_balance = random.randint(20, 50) * 10000 if (age > 40) else random.randint(1, 20) * 10000
    
    # Generate yearly expenditure (less than yearly income)
    yearly_expenditure = random.randint(10, yearly_income // 1000) * 1000
    
    # Calculate savings
    savings = yearly_income - yearly_expenditure + random.randint(10000,500000) if age > 40 else random.randint(500, 10000)
    
    # Calculate quality of life (based on the given rule)
    quality_of_life = random.randint(1, 4) if (yearly_income < 50000 and yearly_expenditure > 0.5 * yearly_income) else random.randint(5, 10)
    
    # Calculate disaster preparedness (6 months of income for a rainy day)
    disaster_preparedness = random.randint(6, 10) if savings >= 0.5 * yearly_income else random.randint(1, 5)
    
    # Calculate retirement readiness (assuming retirement age of 65)
    years_until_retirement = 65 - age
    required_savings_for_retirement = (random.randint(20, 50) * 1000) * (83 - 65)  # Assuming life expectancy of 83 years
    retirement_readiness = random.randint(6, 10) if savings >= required_savings_for_retirement else random.randint(1, 5)
    
    # Append generated data to the list
    data.append([age, housingtype, yearly_income, cpf_balance, yearly_expenditure, savings, quality_of_life, disaster_preparedness, retirement_readiness])

# Create a DataFrame from the generated data
columns = ['age', 'housingtype', 'yearly income', 'cpf balance', 'yearly expenditure', 'savings', 'quality of life', 'disaster preparedness', 'retirement readiness']
df = pd.DataFrame(data, columns=columns)

# save the mockdata to a csv
df.to_csv("../data/mockdata.csv", index=False)

In [92]:
import random
import pandas as pd

# Define possible housing types
housing_types = ["1&2-Room Flat", "3-Room Flat", "4-Room Flat", "5-Room", "Executive Flat", "Condominium", "Apartment", "Landed Property"]

# Helper function to calculate CPF balance
def calculate_cpf_balance(age, yearly_income):
    total_cpf = 0
    if age <= 55:
        total_cpf = yearly_income * 0.37 * (age - 19)
    else:
        total_cpf = yearly_income * 0.37 * 35 + yearly_income * 0.22 * (age - 55)
    return total_cpf

# Helper function to calculate savings
def calculate_savings(yearly_income, age):
    return yearly_income * 0.20 * (age - 20) * 0.5

# Helper function to generate a score around a base value
def generate_score(base, variance=3):
    return max(1, min(10, base + random.randint(-variance, variance)))

# Initialize list to store generated data
data = []

for _ in range(5000):
    # Generate age (between 20 and 65 years old)
    age = random.randint(20, 65)
    
    # Select a random housing type with conditions for younger people
    if age < 30:
        housingtype = random.choice(housing_types[:5])  # More likely to live in smaller flats
    else:
        housingtype = random.choice(housing_types)
    
    # Generate yearly income with a trend based on age
    if age <= 25:
        yearly_income = random.randint(30, 60) * 1000
    elif age <= 40:
        yearly_income = random.randint(40, 80) * 1000
    else:
        yearly_income = random.randint(50, 150) * 1000
    
    # Calculate CPF balance
    cpf_balance = calculate_cpf_balance(age, yearly_income)
    
    # Generate yearly expenditure based on the given conditions
    if age < 30:
        yearly_expenditure = random.randint(5, 20) * 1000  # Lower expenses for young adults
    else:
        monthly_expenses = 2500 + 500 + 1000  # Rent + groceries + car mortgage
        other_expenses = random.randint(1, 5) * 1000  # Other monthly expenses
        yearly_expenditure = (monthly_expenses + other_expenses) * 12
    
    # Calculate savings
    savings = calculate_savings(yearly_income, age)
    
    # Calculate base score to be used for quality of life, disaster preparedness, and retirement readiness
    if yearly_income < 50000 or yearly_expenditure > yearly_income:
        base_score = random.randint(1, 4)
    else:
        base_score = random.randint(7, 10)
    
    # Calculate quality of life, disaster preparedness, and retirement readiness around the base score
    quality_of_life = generate_score(base_score)
    disaster_preparedness = generate_score(base_score)
    retirement_readiness = generate_score(base_score)
    
    # Append generated data to the list
    data.append([age, housingtype, yearly_income, cpf_balance, yearly_expenditure, savings, quality_of_life, disaster_preparedness, retirement_readiness])

# Create a DataFrame from the generated data
columns = ['age', 'housingtype', 'yearly income', 'cpf balance', 'yearly expenditure', 'savings', 'quality of life', 'disaster preparedness', 'retirement readiness']
df = pd.DataFrame(data, columns=columns)

# save the mockdata to a csv
df.to_csv("../data/mockdata.csv", index=False)
df

Unnamed: 0,age,housingtype,yearly income,cpf balance,yearly expenditure,savings,quality of life,disaster preparedness,retirement readiness
0,51,1&2-Room Flat,52000,615680.0,72000,161200.0,6,3,5
1,52,1&2-Room Flat,103000,1257630.0,96000,329600.0,6,10,5
2,33,4-Room Flat,52000,269360.0,84000,67600.0,4,1,3
3,28,4-Room Flat,62000,206460.0,16000,49600.0,9,10,10
4,20,3-Room Flat,56000,20720.0,11000,0.0,10,9,9
...,...,...,...,...,...,...,...,...,...
9995,65,5-Room,140000,2121000.0,96000,630000.0,8,10,10
9996,61,Apartment,135000,1926450.0,84000,553500.0,10,10,10
9997,62,Executive Flat,111000,1608390.0,96000,466200.0,6,7,7
9998,55,3-Room Flat,113000,1505160.0,96000,395500.0,6,9,10
