In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.drop(columns=["fnlwgt"], inplace=True)
    df.replace("?", np.nan, inplace=True)
    df.dropna(inplace=True)

    for col in df.columns:
        if df[col].dtype == "object":
            df[col] = LabelEncoder().fit_transform(df[col])

    # K-Anonymity mock: bin "age" and "hours.per.week"
    df["age"] = pd.cut(df["age"], bins=[0, 25, 45, 65, 90], labels=[0, 1, 2, 3])
    df["hours.per.week"] = pd.cut(df["hours.per.week"], bins=[0, 25, 40, 60, 100], labels=[0, 1, 2, 3])

    X = df.drop("income", axis=1).values.astype(np.float32)
    y = df["income"].values.astype(int)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return train_test_split(X_scaled, y, test_size=0.2, random_state=42) + (scaler,)
