<a href="https://colab.research.google.com/github/Devaki01/ACM-Responsible-AI/blob/main/ACM_WS_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === System setup ===
!pip install -q torch torchvision opacus scikit-learn numpy matplotlib tqdm

# Clone ML-Doctor (TrustAIRLab fork is more stable)
!git clone https://github.com/TrustAIRLab/ML-Doctor.git

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/254.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m245.8/254.4 kB[0m [31m12.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.4/254.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCloning into 'ML-Doctor'...
remote: Enumerating objects: 50, done.[K
remote: Counting objects: 100% (50/50), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 50 (delta 18), reused 33 (delta 8), pack-reused 0 (from 0)[K
Receiving objects: 100% (50/50), 27.06 KiB | 3.86 MiB/s, done.
Resolving deltas: 100% (18/18), done.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cpu


In [None]:
transform = transforms.Compose([transforms.ToTensor()])

train_dataset = datasets.MNIST(
    root=".", train=True, download=True, transform=transform
)
test_dataset = datasets.MNIST(
    root=".", train=False, download=True, transform=transform
)

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=128, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=128, shuffle=False
)

100%|██████████| 9.91M/9.91M [00:00<00:00, 16.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 500kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.62MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.41MB/s]


1. Fashion-MNIST

Type: Images (clothing)\
Classes: 10\
Size: 60k train / 10k test

2. EMNIST (Digits / Letters)

Type: Handwritten characters\
Classes: 10–62

3. CIFAR-10

Type: Natural images\
Classes: 10\
Size: 50k / 10k

4. CIFAR-100

Type: Natural images\
Classes: 100

5. KMNIST

Type: Japanese Characters


In [None]:
train_dataset, test_dataset

(Dataset MNIST
     Number of datapoints: 60000
     Root location: .
     Split: Train
     StandardTransform
 Transform: Compose(
                ToTensor()
            ),
 Dataset MNIST
     Number of datapoints: 10000
     Root location: .
     Split: Test
     StandardTransform
 Transform: Compose(
                ToTensor()
            ))

In [None]:
class SimpleMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.net(x.view(x.size(0), -1))

In [None]:
def train_model(model, loader, optimizer, epochs=5):
    criterion = nn.CrossEntropyLoss()
    model.train()
    for epoch in range(epochs):
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()

In [None]:
baseline_model = SimpleMLP().to(device)
optimizer = optim.Adam(baseline_model.parameters(), lr=1e-3)

train_model(baseline_model, train_loader, optimizer)

In [None]:
train_model

![Alt text](https://s3-us-west-2.amazonaws.com/myed-prod/books/1421/docbook/resources/images/Classification%20Metrics%20Formulas.jpg)

In [None]:
def collect_confidence(model, loader):
    model.eval()
    scores = []

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            probs = torch.softmax(model(x), dim=1)
            scores.extend(probs[range(len(y)), y].cpu().numpy())

    return np.array(scores)

###noise_multiplier

It hides the contribution of individual samples by adding randomness.

Higher noise:
Stronger privacy,
Lower accuracy

Lower noise:
Weaker privacy,
Higher accuracy

###max_grad_norm

It limits how much influence any single data point can have on the model update.

Without clipping:

A single outlier sample can dominate the gradient,
This leaks information about that sample

With clipping:

Every sample’s contribution is capped,
Sensitivity becomes bounded meaning privacy becomes enforceable

In [None]:
from opacus import PrivacyEngine

def train_with_dp(model, train_loader):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    privacy_engine = PrivacyEngine()

    model, optimizer, dp_loader = privacy_engine.make_private(
        module=model,
        optimizer=optimizer,
        data_loader=train_loader,
        noise_multiplier=1.5,
        max_grad_norm=1.0,
    )

    train_model(model, dp_loader, optimizer)
    return model, privacy_engine

In [None]:
dp_model, privacy_engine = train_with_dp(baseline_model,train_loader)

  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.backward()
  loss.bac

In [None]:
epsilon = privacy_engine.get_epsilon(delta=1e-5)
print(f"ε = {epsilon:.2f}")

ε = 0.28


In [None]:
train_conf_baseline = collect_confidence(baseline_model, train_loader)
test_conf_baseline  = collect_confidence(baseline_model, test_loader)

train_conf_dp = collect_confidence(dp_model, train_loader)
test_conf_dp  = collect_confidence(dp_model, test_loader)

In [None]:
n = min(
    len(train_conf_baseline),
    len(test_conf_baseline),
    len(train_conf_dp),
    len(test_conf_dp)
)

train_conf_baseline = np.random.choice(train_conf_baseline, n, replace=False)
test_conf_baseline  = np.random.choice(test_conf_baseline, n, replace=False)

threshold_baseline = np.median(train_conf_baseline)

y_true = np.concatenate([np.ones(n), np.zeros(n)])

y_pred_baseline = np.concatenate([
    train_conf_baseline > threshold_baseline,
    test_conf_baseline > threshold_baseline
])

print("Baseline MIA Accuracy:", accuracy_score(y_true, y_pred_baseline))
print("Baseline Precision:", precision_score(y_true, y_pred_baseline))
print("Baseline Recall:", recall_score(y_true, y_pred_baseline))

Baseline MIA Accuracy: 0.49175
Baseline Precision: 0.49188391539596654
Baseline Recall: 0.5


In [None]:
np.average(train_conf_baseline), np.average(test_conf_baseline), threshold_baseline

(np.float32(0.96991485), np.float32(0.96136916), np.float32(0.9989272))

In [None]:
train_conf_dp = np.random.choice(train_conf_dp, n, replace=False)
test_conf_dp  = np.random.choice(test_conf_dp, n, replace=False)

threshold_dp = np.median(train_conf_dp)

y_pred_dp = np.concatenate([
    train_conf_dp > threshold_dp,
    test_conf_dp > threshold_dp
])

print("DP MIA Accuracy:", accuracy_score(y_true, y_pred_dp))
print("DP Precision:", precision_score(y_true, y_pred_dp))
print("DP Recall:", recall_score(y_true, y_pred_dp))

DP MIA Accuracy: 0.4956
DP Precision: 0.4956383822363204
DP Recall: 0.5


In [None]:
np.average(train_conf_dp), np.average(test_conf_dp), threshold_dp

(np.float32(0.8471842), np.float32(0.8537957), np.float32(0.98942876))

In [None]:
train_dataset = datasets.FashionMNIST(
    root=".", train=True, download=True, transform=transform
)

train_dataset = datasets.EMNIST(
    root=".", split="balanced", train=True, download=True, transform=transform
)

transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = datasets.CIFAR10(
    root=".", train=True, download=True, transform=transform
)

train_dataset = datasets.KMNIST(
    root=".", train=True, download=True, transform=transform
)