In [34]:
import pandas as pd
import random

# Expanded lists of Arabic names, states, gender, goals, employment status, and budgeting rules
names = [
    "Ali", "Fatimah", "Omar", "Aisha", "Abdullah", "Lama", "Reem", "Hassan", "Sara", "Mohammed",
    "Noura", "Khalid", "Huda", "Salman", "Ahmed", "Yousef", "Mona", "Rawan", "Fahad", "Layla",
    "Zainab", "Amal", "Rania", "Tariq", "Mariam", "Ibrahim", "Samira", "Najla", "Faisal", "Dina",
    "Malik", "Rami", "Bushra", "Khadijah", "Yara", "Jamal", "Saif", "Nada", "Asma", "Hind", "Karim",
    "Naif", "Anas", "Nawaf", "Saad", "Saud", "khalid", "saad", "saif", "ziyad", "ahmed", "sarah", "naser", "Bader",
    "bader", "Bander", "bander", "Sultan", "sultan", "sami", "Faisl", "faisal", "Mugbil", "majid", "abdullah",
    "Azoz", "Yazan", "Turki", "turki", "mazen", "abdulmajid", "Mishal", "osamh", "luay", "rashid", "muhammed",
    "tariq", "adam", "Mutaz", "Jawad", "fares", "feras", "shabab", "salem", "mutlaq", "musab", "abulrhman"
]
states = ["Single", "Married"]
sexes = ["Male", "Female"]
goals = ["Savings", "Investment"]
employment_statuses = ["Employed", "Unemployed", "Student"]
budgeting_rules = ["50/30/20", "70/20/10", "60/20/20"]  # Spending/Saving/Investment percentages

# Function to select the best budgeting rule based on individual data
def select_budgeting_rule(goal, salary, monthly_debt, elementary_expenses):
    # Calculate remaining income after debt and expenses
    remaining_income = salary - (monthly_debt + elementary_expenses)
    if remaining_income < 0:  # If expenses exceed salary, fallback to a default rule
        return "50/30/20"  # Default to 50/30/20 as a safe choice

    # Define the rules as percentages
    rules = {
        "50/30/20": {"spending": 0.50, "saving": 0.30, "investment": 0.20},
        "70/20/10": {"spending": 0.70, "saving": 0.20, "investment": 0.10},
        "60/20/20": {"spending": 0.60, "saving": 0.20, "investment": 0.20},
    }

    # Evaluate the best rule based on the goal
    if goal == "Savings":
        # Choose the rule with the highest saving percentage, with some randomness
        primary_rule = max(rules.keys(), key=lambda rule: rules[rule]["saving"])
        secondary_rule = random.choice([rule for rule in rules if rule != primary_rule])
    elif goal == "Investment":
        # Choose the rule with the highest investment percentage, with some randomness
        primary_rule = max(rules.keys(), key=lambda rule: rules[rule]["investment"])
        secondary_rule = random.choice([rule for rule in rules if rule != primary_rule])
    else:
        # Default to the rule with the highest spending percentage, with some randomness
        primary_rule = max(rules.keys(), key=lambda rule: rules[rule]["spending"])
        secondary_rule = random.choice([rule for rule in rules if rule != primary_rule])

    # Decide whether to pick the primary rule or the secondary rule with a small random chance
    selected_rule = primary_rule if random.random() > 0.50 else secondary_rule  # 80% primary, 20% secondary

    # Ensure the selected rule can accommodate the individual's expenses
    spending_limit = rules[selected_rule]["spending"] * salary
    if elementary_expenses > spending_limit:
        # If elementary expenses exceed the spending limit of the selected rule, default to "70/20/10"
        return "70/20/10"  # Higher spending percentage
    return selected_rule

# Function to generate random data for each column
def generate_record():
    name = random.choice(names)
    
    # Generate employment status
    employment_status = random.choice(employment_statuses)
    
    # Generate age based on employment status
    if employment_status == "Student":
        age = random.randint(18, 30)  # Students are between 18 and 30
    else:
        age = random.randint(18, 60)
    
    # Generate salary based on age and employment status
    if employment_status == "Employed":
        salary = random.randint(5000, 35000) if age > 30 else random.randint(3000, 20000)
    elif employment_status == "Student":
        salary = random.randint(900, 5000)
    elif employment_status == "Unemployed":
        salary = random.randint(900, 3000)
    
    # Generate marital status
    state = random.choice(states)
    
    # Generate number of children
    number_of_children = random.randint(0, 5) if state == "Married" else 0
    
    # Generate gender
    sex = random.choice(sexes)
    
    # Generate elementary expenses based on marital status, children, and gender
    if state == "Married" or number_of_children > 0:
        base_expenses = random.uniform(3000, 12000)
    else:
        base_expenses = random.uniform(400, 5000)
    
    # Women generally spend more
    if sex == "Female":
        elementary_expenses = base_expenses * random.uniform(1.1, 1.3)
    else:
        elementary_expenses = base_expenses
    
    # Generate monthly debt
    monthly_debt = round(random.uniform(50, salary * 0.3), 2)
    
    # Generate goal
    goal = random.choice(goals)
    
    # Generate nationality
    nationality = random.choices(["Saudi", "Other"], weights=[98, 2], k=1)[0]

    # Generate Financial_Comfortability randomly
    financial_comfortability = random.choices(["yes", "no"], weights=[70, 30], k=1)
    
    # Generate Goal_Progress randomly
    goal_progress = random.choices(["yes", "no"], weights=[80, 20], k=1)[0]
    
    
    # Determine the best budgeting rule
    budgeting_rule = select_budgeting_rule(goal, salary, monthly_debt, elementary_expenses)
    
    return {
        "Name": name,
        "Salary (SAR)": salary,
        "Age": age,
        "State": state,
        "Sex": sex,
        "Monthly Debt (SAR)": monthly_debt,
        "Elementary Expenses (SAR)": round(elementary_expenses, 2),
        "Goal": goal,
        "Number of Children": number_of_children,
        "Employment_Status": employment_status,
        "Nationality": nationality,
        "Budgeting_Rule": budgeting_rule,
        "Financial_Comfortability": financial_comfortability,
        "Goal_Progress": goal_progress
    }

# Generate 523 records
data = [generate_record() for _ in range(1176)]

# Create a DataFrame
df = pd.DataFrame(data)

# Save to an Excel file
df.to_excel("Survey_ICS487_Project.xlsx", index=False)  # Save the dataset to an Excel file


