In [17]:
import numpy as np
import pandas as pd
from fairlearn.datasets import fetch_adult
from sklearn.preprocessing import OrdinalEncoder
from cplex_fair_assignment_lp_solver_util import fair_partial_assignment_util
from util.clusteringutil import vanilla_clustering
from scipy.spatial.distance import cdist
import sys, os

# Ensure local modules can be imported
try:
    sys.path.append(os.path.dirname(os.path.abspath(__file__)))
except NameError:
    sys.path.append(os.getcwd())

# ==== Step 1: Load and preprocess a small subset of the Adult dataset ====

data = fetch_adult(as_frame=True)
df_full = data.data.copy()  # Already includes 'sex' column

# Only use Male/Female rows and drop missing
filtered = df_full[df_full['sex'].isin(['Male', 'Female'])].dropna()
print("Filtered adult dataset size:", len(filtered))

# Take a small safe subset
n_samples = min(100, len(filtered))
df_small = filtered.sample(n=n_samples, random_state=0)

# Ordinal encode and normalize
encoder = OrdinalEncoder()
X_encoded = encoder.fit_transform(df_small.drop(columns=['sex']))
X_normalized = (X_encoded - X_encoded.mean(axis=0)) / X_encoded.std(axis=0)

# Color flag: Female=1, Male=0
color_flag = (df_small['sex'] == 'Female').astype(int).tolist()

# ==== Step 2: Choose centers using FCBC's vanilla_clustering ====
k = 5  # number of clusters
_, _, centers = vanilla_clustering(X_normalized, k, clustering_method="kmeans")

# ==== Step 3: Compute distance matrix ====
distance_matrix = cdist(X_normalized, centers, metric='sqeuclidean')

# ==== Step 4: Run FCBC LP + Rounding pipeline ====
result = fair_partial_assignment_util(
    df=X_normalized,
    centers=centers,
    initial_score=distance_matrix,
    delta=0.05,
    color_proprtions=None,
    alpha=1,
    beta=1,
    color_flag=color_flag,
    clustering_method="kmeans",
    num_colors=2,
    L=None,
    epsilon=1e-5,
    alpha_POF=1
)

# ==== Step 5: Print final result summary ====
print("\n✅ FCBC pipeline completed on Adult subset\n")
print("Final objective value:", result["objective"])
print("Rounded color proportions (normalized):", result["proportions_normalized"])
print("LP color proportions (normalized):", result["partial_proportions_normalized"])

# Optional: Inspect assignments
assignments = np.array(result['assignment']).reshape((-1, k))
print("\nCluster sizes:", np.sum(assignments, axis=0))


Filtered adult dataset size: 45222


TypeError: fair_partial_assignment_util() got an unexpected keyword argument 'cluster_centers'