In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from opacus import PrivacyEngine
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch import nn, optim

# Import Dataset

In [2]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
adult = fetch_ucirepo(id=2) 
  
# data (as pandas dataframes) 
X = adult.data.features 
y = adult.data.targets 


In [3]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             48842 non-null  int64 
 1   workclass       47879 non-null  object
 2   fnlwgt          48842 non-null  int64 
 3   education       48842 non-null  object
 4   education-num   48842 non-null  int64 
 5   marital-status  48842 non-null  object
 6   occupation      47876 non-null  object
 7   relationship    48842 non-null  object
 8   race            48842 non-null  object
 9   sex             48842 non-null  object
 10  capital-gain    48842 non-null  int64 
 11  capital-loss    48842 non-null  int64 
 12  hours-per-week  48842 non-null  int64 
 13  native-country  48568 non-null  object
dtypes: int64(6), object(8)
memory usage: 5.2+ MB


In [4]:
X.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba


# Data Cleaning 

## BASELINE MODEL (No DP)

In [5]:
y = y.iloc[:, 0]
# Strip spaces and remove periods
y = y.str.strip().str.replace('.', '', regex=False)

# Map both <=50K and <50K to 0; >=50K and >50K to 1
y = y.replace({
    '<=50K': 0,
    '<50K': 0,
    '>=50K': 1,
    '>50K': 1
})

categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(exclude=['object']).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8510594738458389

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.94      0.91      7414
           1       0.74      0.59      0.65      2355

    accuracy                           0.85      9769
   macro avg       0.81      0.76      0.78      9769
weighted avg       0.84      0.85      0.84      9769



In [6]:
def fairness_metrics(X_test, y_true, y_pred, feature):
    df_eval = X_test.copy()
    df_eval['y_true'] = y_true
    df_eval['y_pred'] = y_pred
    
    groups = df_eval[feature].unique()
    metrics = []

    for g in groups:
        mask = df_eval[feature] == g
        y_t = df_eval.loc[mask, 'y_true']
        y_p = df_eval.loc[mask, 'y_pred']

        # Positive prediction rate (Demographic Parity)
        pos_rate = np.mean(y_p)

        # True Positive Rate (Equal Opportunity)
        tp = np.sum((y_p == 1) & (y_t == 1))
        fn = np.sum((y_p == 0) & (y_t == 1))
        tpr = tp / (tp + fn + 1e-10)

        # False Positive Rate (Equalized Odds)
        fp = np.sum((y_p == 1) & (y_t == 0))
        tn = np.sum((y_p == 0) & (y_t == 0))
        fpr = fp / (fp + tn + 1e-10)

        # Accuracy
        acc = np.mean(y_p == y_t)

        metrics.append({
            'group': g,
            'positive_rate': pos_rate,
            'TPR': tpr,
            'FPR': fpr,
            'accuracy': acc
        })

    df_metrics = pd.DataFrame(metrics)
    print(f"\n=== Fairness by {feature} ===")
    print(df_metrics)

    # Group gaps
    max_min_gap = df_metrics[['positive_rate', 'TPR', 'FPR', 'accuracy']].max() - df_metrics[['positive_rate', 'TPR', 'FPR', 'accuracy']].min()
    print("\nGaps between groups:")
    print(max_min_gap)

    return df_metrics

In [7]:
def evaluate_model_full(name, model, X_model_input, y_true, X_sensitive, fairness_metrics_func=fairness_metrics):
    """
    Evaluates a model (PyTorch DP/non-DP or sklearn) with standard metrics and fairness metrics.

    Args:
        name (str): Model name (for printing)
        model: sklearn model or PyTorch nn.Module
        X_model_input: np.ndarray or torch.Tensor input for the model
        y_true: array-like, true labels
        X_sensitive: pd.DataFrame with sensitive features (e.g., 'sex', 'race') for fairness
        fairness_metrics_func: function to compute fairness metrics
    """

    # Detect if PyTorch model
    is_torch_model = isinstance(model, torch.nn.Module)

    if is_torch_model:
        # Ensure tensor input
        if not torch.is_tensor(X_model_input):
            X_model_input = torch.tensor(X_model_input, dtype=torch.float32)
        model.eval()
        with torch.no_grad():
            outputs = model(X_model_input)
            probs = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            y_pred = np.argmax(outputs.cpu().numpy(), axis=1)
    else:
        # sklearn model
        y_pred = model.predict(X_model_input)
        if hasattr(model, "predict_proba"):
            probs = model.predict_proba(X_model_input)[:, 1]
        else:
            probs = y_pred  # fallback if predict_proba not available

    # Standard metrics
    acc = accuracy_score(y_true, y_pred)
    try:
        auc = roc_auc_score(y_true, probs)
    except ValueError:
        auc = float('nan')

    print(f"\n=== Evaluation for {name} ===")
    print(f"Accuracy: {acc:.3f}")
    print(f"AUC: {auc:.3f}")

    # Fairness metrics
    sex_metrics = race_metrics = None
    if fairness_metrics_func and X_sensitive is not None:
        if 'sex' in X_sensitive.columns:
            sex_metrics = fairness_metrics_func(X_sensitive, y_true, y_pred, 'sex')
        if 'race' in X_sensitive.columns:
            race_metrics = fairness_metrics_func(X_sensitive, y_true, y_pred, 'race')

    return {
        "accuracy": acc,
        "auc": auc,
        "sex_metrics": sex_metrics,
        "race_metrics": race_metrics
    }

In [8]:
# For non-DP model
results_no_dp = evaluate_model_full(
    name="No DP Logistic Regression",
    model=clf,                   # sklearn or PyTorch model
    X_model_input=X_test_tensor if isinstance(clf, torch.nn.Module) else X_test, 
    y_true=y_test,
    X_sensitive=X_test           # original DataFrame with sensitive features
)


=== Evaluation for No DP Logistic Regression ===
Accuracy: 0.851
AUC: 0.906

=== Fairness by sex ===
    group  positive_rate       TPR       FPR  accuracy
0    Male       0.248623  0.600503  0.094589  0.812576
1  Female       0.072688  0.506849  0.017434  0.928859

Gaps between groups:
positive_rate    0.175935
TPR              0.093653
FPR              0.077155
accuracy         0.116282
dtype: float64

=== Fairness by race ===
                group  positive_rate       TPR       FPR  accuracy
0               White       0.203071  0.594747  0.068493  0.845388
1               Black       0.077813  0.421488  0.027711  0.902208
2  Amer-Indian-Eskimo       0.052083  0.375000  0.022727  0.927083
3  Asian-Pac-Islander       0.264151  0.651163  0.120690  0.817610
4               Other       0.059701  0.250000  0.033898  0.880597

Gaps between groups:
positive_rate    0.212068
TPR              0.401163
FPR              0.097962
accuracy         0.109473
dtype: float64


- The model predicts that men are more likely to earn over $50K than women, which could reflect real income patterns but could also show bias.
- It’s better at correctly identifying men who earn over $50K than women who do.
- The model falsely labels men as rich more often than women.
- Interestingly, it’s more accurate overall for women, possibly because fewer women are predicted as high earners, reducing some errors.

## DP-SGD

In [27]:
# Preprocess features with your existing preprocessor
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Convert to torch tensors (handle sparse output from OneHotEncoder if any)
def to_tensor(x):
    if hasattr(x, "toarray"):
        return torch.tensor(x.toarray(), dtype=torch.float32)
    return torch.tensor(x, dtype=torch.float32)

X_train_tensor = to_tensor(X_train_processed)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = to_tensor(X_test_processed)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define logistic regression model
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 2)

    def forward(self, x):
        return self.linear(x)

model = LogisticRegressionModel(X_train_tensor.shape[1])

# Optimizer and loss
optimizer = optim.SGD(model.parameters(), lr=0.1) # we can tune the learning rate (lr)
criterion = nn.CrossEntropyLoss()

# Setup Privacy Engine for DP-SGD
privacy_engine = PrivacyEngine()

model, optimizer, train_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=5.0,  # we can tune this value (higher means high privacy)
    max_grad_norm=0.5, # we can tune this value (lower means high privacy)
)

# Training loop
model.train()
for epoch in range(10):
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} done")

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    preds = outputs.argmax(dim=1).numpy()
    acc = accuracy_score(y_test, preds)
    print(f"DP-SGD Logistic Regression Test Accuracy: {acc:.6f}")

epsilon_dpsgd = privacy_engine.get_epsilon(delta=1e-5)
print(f"Achieved epsilon (for DP-SGD): {epsilon_dpsgd:.6f}")



Epoch 1 done
Epoch 2 done
Epoch 3 done
Epoch 4 done
Epoch 5 done
Epoch 6 done
Epoch 7 done
Epoch 8 done
Epoch 9 done
Epoch 10 done
DP-SGD Logistic Regression Test Accuracy: 0.841744
Achieved epsilon (for DP-SGD): 0.088116




In [28]:
results_dp = evaluate_model_full(
    name="DP-SGD Logistic Regression",
    model=model,                 # PyTorch DP model
    X_model_input=X_test_tensor, # tensor for model input
    y_true=y_test,
    X_sensitive=X_test           # original DataFrame for fairness
)


=== Evaluation for DP-SGD Logistic Regression ===
Accuracy: 0.842
AUC: 0.892

=== Fairness by sex ===
    group  positive_rate       TPR       FPR  accuracy
0    Male       0.229957  0.555276  0.087549  0.803703
1  Female       0.057532  0.394521  0.014644  0.918651

Gaps between groups:
positive_rate    0.172425
TPR              0.160756
FPR              0.072905
accuracy         0.114949
dtype: float64

=== Fairness by race ===
                group  positive_rate       TPR       FPR  accuracy
0               White       0.181120  0.533771  0.059952  0.836152
1               Black       0.068349  0.371901  0.024096  0.899054
2  Amer-Indian-Eskimo       0.072917  0.375000  0.045455  0.906250
3  Asian-Pac-Islander       0.317610  0.686047  0.181034  0.783019
4               Other       0.089552  0.500000  0.033898  0.910448

Gaps between groups:
positive_rate    0.249261
TPR              0.314146
FPR              0.156938
accuracy         0.127429
dtype: float64


# Selective DP-SGD

In [29]:
# DP hyperparameters (same as DP config above)
noise_multiplier = 5.0
max_grad_norm = 0.5
delta = 1e-5

model1 = LogisticRegressionModel(X_train_tensor.shape[1])
optimizer = optim.SGD(model1.parameters(), lr=0.1)
model1.train() 

# Attach PrivacyEngine
privacy_engine = PrivacyEngine()
model1, optimizer, train_loader = privacy_engine.make_private(
    module=model1,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_grad_norm,
)

# Identify sensitive feature indices
sensitive_features = ['sex', 'race']
sensitive_indices = []

# Extract encoded feature names
encoded_feature_names = preprocessor.get_feature_names_out()
for idx, name in enumerate(encoded_feature_names):
    for sf in sensitive_features:
        if sf in name:
            sensitive_indices.append(idx)

print("Sensitive indices:", sensitive_indices)

# Training loop (Selective DP)
for epoch in range(10):
    for data, target in train_loader:
        optimizer.zero_grad()
        outputs = model1(data)
        loss = criterion(outputs, target)
        loss.backward()

        # Selective noise addition
        with torch.no_grad():
            for name, param in model1.named_parameters():
                if "linear.weight" in name:
                    grad = param.grad
                    mask = torch.zeros_like(grad)
                    mask[:, sensitive_indices] = 1.0  # sensitive features only

                    # Add Gaussian noise only to masked gradients
                    noise = torch.normal(
                        mean=0,
                        std=noise_multiplier * max_grad_norm,
                        size=grad.shape,
                        device=grad.device
                    )
                    grad.add_(noise * mask)

        optimizer.step()

    print(f"Epoch {epoch+1} done")

# Evaluate
model1.eval()
with torch.no_grad():
    outputs = model1(X_test_tensor)
    preds = outputs.argmax(dim=1).numpy()
    acc = accuracy_score(y_test, preds)
    print(f"Selective DP-SGD Accuracy: {acc:.6f}")

epsilon = privacy_engine.get_epsilon(delta)
print(f"Achieved epsilon (for Selective DP-SGD): {epsilon:.6f}")



Sensitive indices: [61, 62, 63, 64, 65, 66, 67]
Epoch 1 done
Epoch 2 done
Epoch 3 done
Epoch 4 done
Epoch 5 done
Epoch 6 done
Epoch 7 done
Epoch 8 done
Epoch 9 done
Epoch 10 done
Selective DP-SGD Accuracy: 0.842973
Achieved epsilon (for Selective DP-SGD): 0.088116




In [30]:
results_selective_dp = evaluate_model_full(
    name="Selective DP-SGD Logistic Regression",
    model=model1,                  
    X_model_input=X_test_tensor, 
    y_true=y_test,
    X_sensitive=X_test         
)


=== Evaluation for Selective DP-SGD Logistic Regression ===
Accuracy: 0.843
AUC: 0.893

=== Fairness by sex ===
    group  positive_rate       TPR       FPR  accuracy
0    Male       0.249235  0.588442  0.100748  0.804621
1  Female       0.065574  0.438356  0.018131  0.920507

Gaps between groups:
positive_rate    0.183661
TPR              0.150086
FPR              0.082617
accuracy         0.115887
dtype: float64

=== Fairness by race ===
                group  positive_rate       TPR       FPR  accuracy
0               White       0.195754  0.563790  0.069299  0.836872
1               Black       0.097792  0.504132  0.038554  0.903260
2  Amer-Indian-Eskimo       0.072917  0.375000  0.045455  0.906250
3  Asian-Pac-Islander       0.314465  0.697674  0.172414  0.792453
4               Other       0.134328  0.625000  0.067797  0.895522

Gaps between groups:
positive_rate    0.241549
TPR              0.322674
FPR              0.133860
accuracy         0.113797
dtype: float64


# Explore Privacy-Utility/Fairness Trade-Off

Hi, I was thinking about exploring how different privacy levels affect model performance by fine-tuning several parameters.

There are several ways we can approach this, such as creating a comparison table between regular DP-SGD and selective DP-SGD, or generating plots that show how these parameters impact accuracy and fairness. The setup will involve training both models under the same configurations and then comparing their utility (e.g., accuracy or AUC score) and fairness (e.g., how much accuracy decreases across gender or race groups).

The parameters we can vary are:
- noise_multiplier (higher values → stronger privacy)
- max_grad_norm (lower values → stronger privacy)

We’ll focus on these two parameters since they are the most directly related to differential privacy.

Constant parameters (kept fixed):
- lr (learning rate)
- number of epochs
- delta (a tunable parameter used in calculating epsilon, the privacy budget. It doesn’t directly affect accuracy, so we don’t need to focus on it.)

We could write a function that takes as input lists or ranges of noise_multiplier and max_grad_norm, along with both models (standard DP-SGD and selective DP-SGD) and the baseline non-private model (referred to as clf). The function would store various results for comparison, including:
- Overall accuracy
- Accuracy by group (e.g., Male, Female, White, Black, etc.)
- Drop in accuracy per group (relative to the baseline non-private model, to analyze which groups are more affected by privacy)
- Fairness gaps across gender and race
- Finally, we can summarize these results in a comparison table and generate visualizations (e.g., plots) to illustrate the effects of different privacy settings.