In [None]:
pip install traker

In [2]:
from trak import TRAKer

def get_trak_matrix(
    train_dl, val_dl, model, ckpts, train_set_size, val_set_size, **kwargs
):
    if kwargs is None or kwargs.get("task") is None:
        task = "image_classification"
    else:
        task = kwargs.pop("task")

    traker = TRAKer(model=model, task=task, train_set_size=train_set_size, **kwargs)

    for model_id, checkpoint in enumerate(ckpts):
        traker.load_checkpoint(checkpoint, model_id=model_id)
        for batch in train_dl:
            batch = [x.cuda() for x in batch]
            # batch should be a tuple/list of inputs and labels
            traker.featurize(batch=batch, num_samples=batch[0].shape[0])

    traker.finalize_features()

    for model_id, checkpoint in enumerate(ckpts):
        traker.start_scoring_checkpoint(
            exp_name="test",
            checkpoint=checkpoint,
            model_id=model_id,
            num_targets=val_set_size,
        )
    for batch in val_dl:
        batch = [x.cuda() for x in batch]
        traker.score(batch=batch, num_samples=batch[0].shape[0])

    scores = traker.finalize_scores(exp_name="test")
    return scores


In [3]:
import torch
import numpy as np
from torch.nn import functional as F

class DDA:
    """
    Debiasing through Data Attribution
    """

    def __init__(
        self,
        model,
        checkpoints,
        train_dataloader,
        val_dataloader,
        group_indices,
        train_set_size=None,
        val_set_size=None,
        trak_scores=None,
        trak_kwargs=None,
        device="cuda",
    ) -> None:
        """
        Args:
            model:
                The model to be debiased.
            checkpoints:
                A list of model checkpoints (state dictionaries) for debiasing
                (used to compute TRAK scores).
            train_dataloader:
                DataLoader for the training dataset.
            val_dataloader:
                DataLoader for the validation dataset.
            group_indices:
                A list indicating the group each sample in the validation
                dataset belongs to.
            train_set_size (optional):
                The size of the training dataset. Required if the dataloader
                does not have a dataset attribute.
            val_set_size (optional):
                The size of the validation dataset. Required if the dataloader
                does not have a dataset attribute.
            trak_scores (optional):
                Precomputed TRAK scores. If not provided, they will be computed
                from scratch.
            trak_kwargs (optional):
                Additional keyword arguments to be passed to
                `attrib.get_trak_matrix`.
            device (optional):
                pytorch device
        """
        self.model = model
        self.checkpoints = checkpoints
        self.dataloaders = {"train": train_dataloader, "val": val_dataloader}
        self.group_indices = group_indices
        self.device = device

        if trak_scores is not None:
            self.trak_scores = trak_scores
        else:
            try:
                self.train_set_size = len(train_dataloader.dataset)
                self.val_set_size = len(val_dataloader.dataset)
            except AttributeError as e:
                print(
                    f"No dataset attribute found in train_dataloader or val_dataloader. {e}"
                )
                if train_set_size is None or val_set_size is None:
                    raise ValueError(
                        "train_set_size and val_set_size must be specified if "
                        "train_dataloader and val_dataloader do not have a "
                        "dataset attribute."
                    ) from e
                self.train_set_size = train_set_size
                self.val_set_size = val_set_size

            # Step 1: compute TRAK scores
            if trak_kwargs is not None:
                trak_scores = get_trak_matrix(
                    train_dl=self.dataloaders["train"],
                    val_dl=self.dataloaders["val"],
                    model=self.model,
                    ckpts=self.checkpoints,
                    train_set_size=self.train_set_size,
                    val_set_size=self.val_set_size,
                    **trak_kwargs,
                )
            else:
                trak_scores = get_trak_matrix(
                    train_dl=self.dataloaders["train"],
                    val_dl=self.dataloaders["val"],
                    model=self.model,
                    ckpts=self.checkpoints,
                    train_set_size=self.train_set_size,
                    val_set_size=self.val_set_size,
                )

            self.trak_scores = trak_scores

    def get_group_losses(self, model, val_dl, group_indices) -> list:
        """Returns a list of losses for each group in the validation set."""
        losses = []
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_dl:
                outputs = model(inputs.to(self.device))
                loss = F.cross_entropy(
                    outputs, labels.to(self.device), reduction="none"
                )
                losses.append(loss)
        losses = torch.cat(losses)

        n_groups = len(set(group_indices))
        group_losses = [losses[group_indices == i].mean() for i in range(n_groups)]
        return group_losses

    def compute_group_alignment_scores(self, trak_scores, group_indices, group_losses):
        """
        Computes group alignment scores (check Section 3.2 in our paper for
        details).

        Args:
            trak_scores:
                result of get_trak_matrix
            group_indices:
                a list of the form [group_index(x) for x in train_dataset]

        Returns:
            a list of group alignment scores for each training example
        """
        n_groups = len(set(group_indices))
        S = np.array(trak_scores)
        g = [
            group_losses[i].cpu().numpy() * S[:, np.array(group_indices) == i].mean(axis=1)
            for i in range(n_groups)
        ]
        g = np.stack(g)
        group_alignment_scores = g.mean(axis=0)
        return group_alignment_scores

    def get_debiased_train_indices(
        self, group_alignment_scores, use_heuristic=True, num_to_discard=None
    ):
        """
        If use_heuristic is True, training examples with negative score will be discarded,
        and the parameter num_to_discard will be ignored
        Otherwise, the num_to_discard training examples with lowest scores will be discarded.
        """
        if use_heuristic:
            return [i for i, score in enumerate(group_alignment_scores) if score >= 0]

        if num_to_discard is None:
            raise ValueError("num_to_discard must be specified if not using heuristic.")

        sorted_indices = sorted(
            range(len(group_alignment_scores)),
            key=lambda i: group_alignment_scores[i],
        )
        return sorted_indices[num_to_discard:]

    def debias(self, use_heuristic=True, num_to_discard=None):
        """
        Debiases the training process by constructing a new training set that
        excludes examples which harm worst-group accuracy.

        Args:
            use_heuristic:
                If True, examples with negative group alignment scores are
                discarded.  If False, the `num_to_discard` examples with the
                lowest scores are discarded.
            num_to_discard:
                The number of training examples to discard based on their group
                alignment scores.  This parameter is ignored if `use_heuristic`
                is True.

        Returns:
            debiased_train_inds (list):
                A list of indices for the training examples that should be
                included in the debiased training set.
        """

        # Step 2 (Step 1 is to compute TRAK scores):
        # compute group alignment scores
        group_losses = self.get_group_losses(
            model=self.model,
            val_dl=self.dataloaders["val"],
            group_indices=self.group_indices,
        )

        group_alignment_scores = self.compute_group_alignment_scores(
            self.trak_scores, self.group_indices, group_losses
        )

        # Step 3:
        # construct new training set
        debiased_train_inds = self.get_debiased_train_indices(
            group_alignment_scores,
            use_heuristic=use_heuristic,
            num_to_discard=num_to_discard,
        )

        return debiased_train_inds


In [4]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# CelebA

In [6]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from PIL import Image
import pandas as pd

# Path to CelebA images and metadata
celeba_images_path = "/kaggle/input/celeba-dataset/img_align_celeba/img_align_celeba"
partition_file = "/kaggle/input/celeba-dataset/list_eval_partition.csv"
attributes_file = "/kaggle/input/celeba-dataset/list_attr_celeba.csv"

# Function to get DataLoader for CelebA
def get_dataloader(
        batch_size=128, num_workers=4, split="train", shuffle=False, augment=True
    ):
    """
    Get DataLoader for the CelebA dataset.
    """
    # Define transformations
    if augment:
        transforms_pipeline = transforms.Compose(
            [
                transforms.RandomHorizontalFlip(),
                transforms.CenterCrop(178),  # Crop central face region
                transforms.Resize(128),  # Resize to smaller dimensions
                transforms.ToTensor(),
                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),  # Normalize to [-1, 1]
            ]
        )
    else:
        transforms_pipeline = transforms.Compose(
            [
                transforms.CenterCrop(178),
                transforms.Resize(128),
                transforms.ToTensor(),
                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
            ]
        )

    # Determine dataset split
    dataset_split = "train" if split == "train" else "valid"

    # Load partition and attributes
    partitions = pd.read_csv(partition_file)
    attributes = pd.read_csv(attributes_file)

    # Ensure attributes are binary
    attributes.iloc[:, 1:] = attributes.iloc[:, 1:].applymap(lambda x: 1 if x == 1 else 0)

    # Select indices based on split
    if dataset_split == "train":
        selected_indices = partitions[partitions['partition'] == 0].index
    else:
        selected_indices = partitions[partitions['partition'] == 1].index

    # Custom Dataset class for CelebA
    class CelebADataset(torch.utils.data.Dataset):
        def __init__(self, indices, img_dir, attributes, transform=None):
            self.indices = indices
            self.img_dir = img_dir
            self.attributes = attributes
            self.transform = transform

        def __len__(self):
            return len(self.indices)

        def __getitem__(self, idx):
            img_index = self.indices[idx]
            img_name = self.attributes.iloc[img_index, 0]  # Image file name
            img_path = os.path.join(self.img_dir, img_name)

            # Load and preprocess the image
            image = Image.open(img_path).convert("RGB")
            if self.transform:
                image = self.transform(image)

            # Get class label (convert to long tensor)
            label = torch.tensor(self.attributes.iloc[img_index, 1:].values.astype('float32'))
            label = label.argmax().long()
            return image, label

    # Create Dataset and DataLoader
    dataset = CelebADataset(
        indices=selected_indices,
        img_dir=celeba_images_path,
        attributes=attributes,
        transform=transforms_pipeline
    )

    loader = DataLoader(
        dataset=dataset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers
    )

    return loader, dataset

# Load pre-trained model
from torchvision.models import resnet18, ResNet18_Weights
model_before_mitigating = resnet18(weights=ResNet18_Weights.DEFAULT).cuda()
model_before_mitigating.eval()  # Set model to evaluation mode

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_before_mitigating.parameters(), lr=0.001, momentum=0.9)

# Get DataLoaders
train_loader, train_dataset = get_dataloader(batch_size=32, split="train")
val_loader, val_dataset = get_dataloader(batch_size=32, split="val", shuffle=False, augment=False)

# Training Loop
num_epochs = 4  # Adjust as needed
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    model_before_mitigating.train()  # Set model to training mode
    epoch_loss = 0.0
    for i, (images, labels) in enumerate(tqdm(train_loader, desc="Training")):
        images = images.cuda()
        labels = labels.cuda()

        # Forward pass
        outputs = model_before_mitigating(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.item()

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_loss = epoch_loss / len(train_loader)
    print(f"Training Loss: {avg_loss:.4f}")

    # Evaluation
    model_before_mitigating.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation"):
            images = images.cuda()
            labels = labels.cuda()
            outputs = model_before_mitigating(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")

# Final Output
print("Training and evaluation completed.")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 84.3MB/s]
  attributes.iloc[:, 1:] = attributes.iloc[:, 1:].applymap(lambda x: 1 if x == 1 else 0)
  attributes.iloc[:, 1:] = attributes.iloc[:, 1:].applymap(lambda x: 1 if x == 1 else 0)



Epoch 1/4


Training: 100%|██████████| 5087/5087 [05:44<00:00, 14.78it/s]


Training Loss: 1.3825


Validation: 100%|██████████| 621/621 [00:37<00:00, 16.69it/s]


Validation Accuracy: 57.65%

Epoch 2/4


Training: 100%|██████████| 5087/5087 [03:21<00:00, 25.19it/s]


Training Loss: 1.1890


Validation: 100%|██████████| 621/621 [00:20<00:00, 30.21it/s]


Validation Accuracy: 58.43%

Epoch 3/4


Training: 100%|██████████| 5087/5087 [03:21<00:00, 25.21it/s]


Training Loss: 1.1290


Validation: 100%|██████████| 621/621 [00:20<00:00, 30.77it/s]


Validation Accuracy: 58.75%

Epoch 4/4


Training: 100%|██████████| 5087/5087 [03:21<00:00, 25.20it/s]


Training Loss: 1.0797


Validation: 100%|██████████| 621/621 [00:20<00:00, 30.53it/s]

Validation Accuracy: 58.57%
Training and evaluation completed.





In [7]:
import pandas as pd

# Load the CelebA attributes file
attributes_file = "/kaggle/input/celeba-dataset/list_attr_celeba.csv"
attributes = pd.read_csv(attributes_file)

attributes['Young'] = attributes['Young'].apply(lambda x: 1 if x == 1 else 0)
attributes['Male'] = attributes['Male'].apply(lambda x: 1 if x == 1 else 0)

def define_subgroups(row):
    if row['Young'] == 1 and row['Male'] == 1:
        return 'young-male'
    elif row['Young'] == 1 and row['Male'] == 0:
        return 'young-female'
    elif row['Young'] == 0 and row['Male'] == 1:
        return 'old-male'
    elif row['Young'] == 0 and row['Male'] == 0:
        return 'old-female'

attributes['subgroup'] = attributes.apply(define_subgroups, axis=1)

subgroup_mapping = {name: i for i, name in enumerate(attributes['subgroup'].unique())}
attributes['group_index'] = attributes['subgroup'].map(subgroup_mapping)

print("Subgroup Distribution:")
print(attributes['subgroup'].value_counts())

Subgroup Distribution:
subgroup
young-female    103287
young-male       53447
old-male         30987
old-female       14878
Name: count, dtype: int64


In [9]:
group_labels = attributes.loc[val_dataset.indices, 'group_index'].values
# group_inds = group_labels

In [10]:
from sklearn.metrics import accuracy_score
import numpy as np

def evaluate_worst_group_accuracy(model, val_loader, group_inds, device="cuda"):
    model.eval()  # Set model to evaluation mode
    group_preds = {i: [] for i in set(group_inds)}
    group_labels = {i: [] for i in set(group_inds)}

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(tqdm(val_loader, desc="Evaluating WGA")):
            inputs = inputs.to(device)
            labels = labels.to(device)  # Remove `.argmax(dim=1)` since labels are not one-hot encoded

            # Predict using the model
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            # Assign predictions and labels to the respective group
            batch_start = batch_idx * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_inds[batch_start:batch_end]

            for i, group in enumerate(batch_groups):
                group_preds[group].append(preds[i])  # Add single prediction
                group_labels[group].append(labels.cpu().numpy()[i])  # Add single label

    group_accuracies = {}
    for group in group_preds.keys():
        if len(group_preds[group]) == 0 or len(group_labels[group]) == 0:
            # Skip groups with no samples
            group_accuracies[group] = 0.0
            continue

        # Convert lists to arrays
        preds = np.array(group_preds[group])
        truths = np.array(group_labels[group])
        group_accuracies[group] = accuracy_score(truths, preds)

    # Print all group accuracies
    for group, acc in group_accuracies.items():
        print(f"Group {group} Accuracy: {acc:.4f}")

    # Find the worst group accuracy
    worst_group_accuracy = min(group_accuracies.values())
    return worst_group_accuracy, group_accuracies

**Calculating Fairness Metrics for Young and Old Groups**

In [11]:
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# Function to evaluate Group Accuracies
def evaluate_group_accuracies(model, val_loader, group_labels, device="cuda"):
    model.eval()
    group_preds = {g: [] for g in set(group_labels)}
    group_truths = {g: [] for g in set(group_labels)}

    with torch.no_grad():
        for i, (images, labels) in enumerate(tqdm(val_loader, desc="Evaluating Group Accuracies")):
            images = images.to(device)
            labels = labels.to(device)  # Remove `.argmax(dim=1)` here

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            # Assign predictions and truths to respective groups
            batch_start = i * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_labels[batch_start:batch_end]

            for j, group in enumerate(batch_groups):
                group_preds[group].append(preds[j])
                group_truths[group].append(labels.cpu().numpy()[j])

    group_accuracies = {}
    for group in group_preds:
        if len(group_preds[group]) == 0:
            group_accuracies[group] = 0.0
        else:
            preds = np.array(group_preds[group])
            truths = np.array(group_truths[group])
            group_accuracies[group] = accuracy_score(truths, preds)

    # Print group accuracies
    for group, acc in group_accuracies.items():
        print(f"Group {group} Accuracy: {acc:.4f}")
    
    return group_accuracies

# Function to evaluate Demographic Parity (DP)
def evaluate_demographic_parity(model, val_loader, group_labels, device="cuda"):
    model.eval()
    group_pprs = {g: [] for g in set(group_labels)}

    with torch.no_grad():
        for i, (images, _) in enumerate(tqdm(val_loader, desc="Evaluating Demographic Parity")):
            images = images.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            batch_start = i * val_loader.batch_size
            batch_end = batch_start + len(preds)
            batch_groups = group_labels[batch_start:batch_end]

            for j, group in enumerate(batch_groups):
                group_pprs[group].append(preds[j])

    ppr_disparities = {}
    for group in group_pprs:
        group_positive_rate = np.mean(group_pprs[group])
        ppr_disparities[group] = group_positive_rate

    # Print group PPRs
    for group, ppr in ppr_disparities.items():
        print(f"Group {group} PPR: {ppr:.4f}")
    
    return ppr_disparities

# Function to evaluate Equal Opportunity (EO)
def evaluate_equal_opportunity(model, val_loader, group_labels, device="cuda"):
    model.eval()
    group_tprs = {g: [] for g in set(group_labels)}

    with torch.no_grad():
        for i, (images, labels) in enumerate(tqdm(val_loader, desc="Evaluating Equal Opportunity")):
            images = images.to(device)
            labels = labels.to(device)  # Remove `.argmax(dim=1)` here

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            batch_start = i * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_labels[batch_start:batch_end]

            for j, group in enumerate(batch_groups):
                tp = (preds[j] == 1 and labels[j].cpu().numpy() == 1)
                actual_positive = labels[j].cpu().numpy() == 1
                group_tprs[group].append(tp / (actual_positive + 1e-8))  # Avoid division by zero

    tpr_disparities = {}
    for group in group_tprs:
        tpr_disparities[group] = np.mean(group_tprs[group])

    # Print group TPRs
    for group, tpr in tpr_disparities.items():
        print(f"Group {group} TPR: {tpr:.4f}")
    
    return tpr_disparities

# Function to evaluate Equalized Odds (EOd)
def evaluate_equalized_odds(model, val_loader, group_labels, device="cuda"):
    model.eval()
    group_tprs = {g: [] for g in set(group_labels)}
    group_fprs = {g: [] for g in set(group_labels)}

    with torch.no_grad():
        for i, (images, labels) in enumerate(tqdm(val_loader, desc="Evaluating Equalized Odds")):
            images = images.to(device)
            labels = labels.to(device)  # Remove `.argmax(dim=1)` here

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            batch_start = i * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_labels[batch_start:batch_end]

            for j, group in enumerate(batch_groups):
                tp = (preds[j] == 1 and labels[j].cpu().numpy() == 1)
                fp = (preds[j] == 1 and labels[j].cpu().numpy() == 0)
                actual_positive = labels[j].cpu().numpy() == 1
                actual_negative = labels[j].cpu().numpy() == 0

                group_tprs[group].append(tp / (actual_positive + 1e-8))  # Avoid division by zero
                group_fprs[group].append(fp / (actual_negative + 1e-8))  # Avoid division by zero

    tpr_disparities = {}
    fpr_disparities = {}
    for group in group_tprs:
        tpr_disparities[group] = np.mean(group_tprs[group])
        fpr_disparities[group] = np.mean(group_fprs[group])

    # Print group TPRs and FPRs
    for group in group_tprs:
        print(f"Group {group} TPR: {tpr_disparities[group]:.4f}, FPR: {fpr_disparities[group]:.4f}")
    
    return tpr_disparities, fpr_disparities

In [12]:
wga, group_accuracies = evaluate_worst_group_accuracy(model_before_mitigating, val_loader, group_labels)
dp_rates = evaluate_demographic_parity(model_before_mitigating, val_loader, group_labels)
eo_tprs = evaluate_equal_opportunity(model_before_mitigating, val_loader, group_labels)
tpr_disparities, fpr_disparities = evaluate_equalized_odds(model_before_mitigating, val_loader, group_labels)

Evaluating WGA: 100%|██████████| 621/621 [00:20<00:00, 30.89it/s]


Group 0 Accuracy: 0.6536
Group 1 Accuracy: 0.5402
Group 2 Accuracy: 0.4719
Group 3 Accuracy: 0.5593


Evaluating Demographic Parity: 100%|██████████| 621/621 [00:18<00:00, 33.60it/s]


Group 0 PPR: 2.0323
Group 1 PPR: 2.7719
Group 2 PPR: 4.5824
Group 3 PPR: 2.8930


Evaluating Equal Opportunity: 100%|██████████| 621/621 [00:18<00:00, 34.14it/s]


Group 0 TPR: 0.3203
Group 1 TPR: 0.0146
Group 2 TPR: 0.0090
Group 3 TPR: 0.3253


Evaluating Equalized Odds: 100%|██████████| 621/621 [00:19<00:00, 32.65it/s]

Group 0 TPR: 0.3203, FPR: 0.0001
Group 1 TPR: 0.0146, FPR: 0.0033
Group 2 TPR: 0.0090, FPR: 0.0003
Group 3 TPR: 0.3253, FPR: 0.0000





# CIFAR-10 with ResNet18

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm

def get_dataloader(
        batch_size=128, num_workers=5, split="train", shuffle=False, augment=True
    ):
        if augment:
            transforms = torchvision.transforms.Compose(
                [
                    torchvision.transforms.RandomHorizontalFlip(),
                    torchvision.transforms.RandomAffine(0),
                    torchvision.transforms.ToTensor(),
                    torchvision.transforms.Normalize(
                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.201)
                    ),
                ]
            )
        else:
            transforms = torchvision.transforms.Compose(
                [
                    torchvision.transforms.ToTensor(),
                    torchvision.transforms.Normalize(
                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.201)
                    ),
                ]
            )

        is_train = split == "train"
        dataset = torchvision.datasets.CIFAR10(
            root="/tmp/cifar/", download=True, train=is_train, transform=transforms
        )

        loader = torch.utils.data.DataLoader(
            dataset=dataset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers
        )

        return loader

# Load pre-trained model
model_before_mitigating = torchvision.models.resnet18(pretrained=True).cuda()
model_before_mitigating.eval()  # Set model to evaluation mode

# Define loss function and optimizer (if training)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_before_mitigating.parameters(), lr=0.001, momentum=0.9)

# Get data loaders
train_loader = get_dataloader(batch_size=32, split="train")
val_loader = get_dataloader(batch_size=32, split="val", shuffle=False, augment=False)

# Training loop
num_epochs = 10  # Adjust as needed
for epoch in range(num_epochs):
    model_before_mitigating.train()  # Set model to training mode
    for i, (images, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}")):
        images = images.cuda()
        labels = labels.cuda()

        # Forward pass
        outputs = model_before_mitigating(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model_before_mitigating.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation"):
            images = images.cuda()
            labels = labels.cuda()
            outputs = model_before_mitigating(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}, Accuracy: {accuracy:.2f}%")
    
# model_before_mitigating = torchvision.models.resnet18(pretrained=True).cuda().eval()

# #  put some random init weights as a placeholder
# ckpts = [model_before_mitigating.state_dict()]

# train_loader = get_dataloader(batch_size=64)
# val_loader = get_dataloader(split="val", batch_size=64)

#  random group allocations as a placeholder
ckpts = [model_before_mitigating.state_dict()]
group_inds = [np.random.choice(10) for i in range(len(val_loader.dataset))]
# print(f'ckpts; {ckpts}')
# print(f'group indicis; {group_inds}')

# COMPAS with ResNet18

In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load COMPAS dataset
compas_data_path = "/kaggle/input/compas-scores-two-years/compas-scores-two-years.csv"  # Replace with actual path
df = pd.read_csv(compas_data_path)

# Drop irrelevant columns
columns_to_drop = [
    "name", "compas_screening_date", "dob", "c_case_number",  # Personal identifiers
    "is_violent_recid", "is_recid", "c_charge_desc"           # Other non-numeric data
]
df = df.drop(columns=columns_to_drop, errors="ignore")  # Ignore missing columns

# Separate features and labels
labels = df["two_year_recid"]  # Target variable
features = df.drop(columns=["two_year_recid"], errors="ignore")  # Drop target column from features

# Filter numerical features without missing values
numerical_features = features.select_dtypes(include=[np.number]).dropna(axis=1)

# Encode categorical features (e.g., race, sex)
categorical_features = features.select_dtypes(include=["object", "category"])
categorical_features = pd.get_dummies(categorical_features, drop_first=True)

# Combine processed numerical and categorical features
features = pd.concat([numerical_features, categorical_features], axis=1)

# Normalize numerical features
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

# Define COMPAS Dataset Class
class COMPASDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.tensor(self.data[idx], dtype=torch.float32)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y

# Define DataLoader Function
def get_compas_dataloader(batch_size=128, split="train", shuffle=False):
    """
    Get DataLoader for the COMPAS dataset.
    """
    if split == "train":
        dataset = COMPASDataset(X_train, y_train.values)
    else:
        dataset = COMPASDataset(X_val, y_val.values)

    # DataLoader
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return loader, len(dataset)

# Load simplified model (for tabular data)
class COMPASModel(nn.Module):
    def __init__(self, input_size):
        super(COMPASModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 16),
            nn.ReLU(),
            nn.Linear(16, 2)  # Binary classification (2 classes: reoffended or not)
        )

    def forward(self, x):
        return self.fc(x)

# Define dataset and model
train_loader, train_size = get_compas_dataloader(batch_size=32, split="train", shuffle=True)
val_loader, val_size = get_compas_dataloader(batch_size=32, split="val", shuffle=False)

# Input size depends on processed feature count
input_size = next(iter(train_loader))[0].shape[1]
model_before_mitigating = COMPASModel(input_size).cuda()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_before_mitigating.parameters(), lr=0.01, momentum=0.9)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model_before_mitigating.train()
    epoch_loss = 0
    with tqdm(train_loader, desc=f"Epoch {epoch+1} Training", leave=False) as pbar:
        for inputs, labels in pbar:
            inputs = inputs.cuda()
            labels = labels.cuda()

            # Forward pass
            outputs = model_before_mitigating(inputs)
            loss = criterion(outputs, labels)
            epoch_loss += loss.item()

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Update progress bar
            pbar.set_description(f"Epoch {epoch+1} Loss: {epoch_loss / len(train_loader):.4f}")

    print(f"Epoch {epoch+1} Training Loss: {epoch_loss / len(train_loader):.4f}")

    # Validation
    model_before_mitigating.eval()
    correct = 0
    total = 0
    with torch.no_grad(), tqdm(val_loader, desc=f"Epoch {epoch+1} Validation", leave=False) as pbar:
        for inputs, labels in pbar:
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = model_before_mitigating(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            accuracy = 100 * correct / total
            pbar.set_description(f"Validation Accuracy: {accuracy:.2f}%")

    print(f"Epoch {epoch+1} Validation Accuracy: {accuracy:.2f}%")

# Define group indices based on race
race_mapping = {"African-American": 0, "Caucasian": 1, "Other": 2}
group_inds = df["race"].map(race_mapping).fillna(2).values  # Default to 'Other' if missing
group_inds = group_inds[:len(X_val)]  # Align with validation dataset size

# Print message to confirm completion
print("Training and evaluation completed.")
print(f"Group Indices (Sample): {group_inds[:10]}")  # Print sample group indices

                                                                        

Epoch 1 Training Loss: 0.3058


                                                                             

Epoch 1 Validation Accuracy: 93.62%


                                                                        

Epoch 2 Training Loss: 0.0158


                                                                             

Epoch 2 Validation Accuracy: 94.04%


                                                                        

Epoch 3 Training Loss: 0.0042


                                                                             

Epoch 3 Validation Accuracy: 93.97%


                                                                        

Epoch 4 Training Loss: 0.0039


                                                                             

Epoch 4 Validation Accuracy: 94.04%


                                                                        

Epoch 5 Training Loss: 0.0038


                                                                             

Epoch 5 Validation Accuracy: 94.04%


                                                                        

Epoch 6 Training Loss: 0.0037


                                                                             

Epoch 6 Validation Accuracy: 93.90%


                                                                        

Epoch 7 Training Loss: 0.0037


                                                                             

Epoch 7 Validation Accuracy: 93.76%


                                                                        

Epoch 8 Training Loss: 0.0036


                                                                             

Epoch 8 Validation Accuracy: 93.62%


                                                                        

Epoch 9 Training Loss: 0.0036


                                                                             

Epoch 9 Validation Accuracy: 93.69%


                                                                         

Epoch 10 Training Loss: 0.0037


                                                                             

Epoch 10 Validation Accuracy: 93.69%
Training and evaluation completed.
Group Indices (Sample): [2. 0. 0. 0. 2. 2. 1. 2. 1. 1.]




In [33]:
print('YOYO')
ckpts = [model_before_mitigating.state_dict()]
dda = DDA(model_before_mitigating, ckpts, train_loader, val_loader, group_inds)

# debiased_inds = dda.debias()
# print(dda.trak_scores)

YOYO


Finalizing features for all model IDs..: 100%|██████████| 1/1 [00:00<00:00, 6384.02it/s]
Finalizing scores for all model IDs..: 100%|██████████| 1/1 [00:00<00:00, 40.46it/s]


In [35]:
# debiased_inds = dda.debias(use_heuristic=False, num_to_discard=400)
debiased_inds = dda.debias(use_heuristic=True)
len(debiased_inds)
# debiased_inds

2882

In [36]:
from sklearn.metrics import accuracy_score
import torch

def evaluate_overall_accuracy(model, val_loader, device="cuda"):
    model.eval()  # Set the model to evaluation mode
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Predict using the model
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            # Collect all predictions and labels
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    # Compute overall accuracy
    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy

# Example usage
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Evaluating Overall Accuracy for the first model...")
overall_accuracy = evaluate_overall_accuracy(model_before_mitigating, val_loader, device=device)
print(f"Overall Accuracy: {overall_accuracy:.4f}")

Evaluating Overall Accuracy for the first model...
Overall Accuracy: 0.9390


# Calculate WGA before intervention

In [37]:
# group_inds = [np.random.choice(10) for i in range(len(val_loader.dataset))]
# Check alignment of group_inds and dataset
assert len(group_inds) == len(val_loader.dataset), "Group indices length mismatch."
print(f"Group Distribution: {dict(zip(*np.unique(group_inds, return_counts=True)))}")

# Evaluate Worst Group Accuracy
print("Evaluating Worst Group Accuracy (WGA) for the first model...")
worst_group_acc, group_accuracies = evaluate_worst_group_accuracy(model_before_mitigating, val_loader, group_inds, device="cuda")

print(f"Group Accuracies: {group_accuracies}")
print(f"Worst Group Accuracy: {worst_group_acc:.4f}")
print(f"Difference between Worst and Best Group: {(max(group_accuracies.values()) - worst_group_acc):.4f}")

import copy

deep_copy_model = copy.deepcopy(model_before_mitigating)

Group Distribution: {0.0: 728, 1.0: 485, 2.0: 230}
Evaluating Worst Group Accuracy (WGA) for the first model...


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 278.22it/s]

Group 0.0 Accuracy: 0.9368
Group 1.0 Accuracy: 0.9443
Group 2.0 Accuracy: 0.9348
Group Accuracies: {0.0: 0.9368131868131868, 1.0: 0.9443298969072165, 2.0: 0.9347826086956522}
Worst Group Accuracy: 0.9348
Difference between Worst and Best Group: 0.0095





# Equal Opportunity

In [38]:
def calculate_tpr(labels, preds):
    tp = np.sum((preds == 1) & (labels == 1))
    fn = np.sum((preds == 0) & (labels == 1))
    return tp / (tp + fn) if (tp + fn) > 0 else 0.0

def evaluate_equal_opportunity(model, val_loader, group_inds, device="cuda"):
    model.eval()
    group_preds = {i: [] for i in set(group_inds)}
    group_labels = {i: [] for i in set(group_inds)}

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(val_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            batch_start = batch_idx * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_inds[batch_start:batch_end]

            for i, group in enumerate(batch_groups):
                group_preds[group].append(preds[i])
                group_labels[group].append(labels.cpu().numpy()[i])

    group_tprs = {}
    for group in group_preds.keys():
        preds = np.array(group_preds[group])
        labels = np.array(group_labels[group])
        group_tprs[group] = calculate_tpr(labels, preds)

    min_tpr = min(group_tprs.values())
    max_tpr = max(group_tprs.values())
    tpr_disparity = max_tpr - min_tpr

    return group_tprs, tpr_disparity

In [39]:
group_tprs, tpr_disparity = evaluate_equal_opportunity(deep_copy_model, val_loader, group_inds)
print(f"Group TPRs: {group_tprs}")
print(f"TPR Disparity: {tpr_disparity:.4f}")

Group TPRs: {0.0: 0.9013157894736842, 1.0: 0.8986175115207373, 2.0: 0.898989898989899}
TPR Disparity: 0.0027


# Equal Odds

In [40]:
def calculate_fpr(labels, preds):
    fp = np.sum((preds == 1) & (labels == 0))
    tn = np.sum((preds == 0) & (labels == 0))
    return fp / (fp + tn) if (fp + tn) > 0 else 0.0

def evaluate_equalized_odds(model, val_loader, group_inds, device="cuda"):
    model.eval()
    group_preds = {i: [] for i in set(group_inds)}
    group_labels = {i: [] for i in set(group_inds)}

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(val_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            batch_start = batch_idx * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_inds[batch_start:batch_end]

            for i, group in enumerate(batch_groups):
                group_preds[group].append(preds[i])
                group_labels[group].append(labels.cpu().numpy()[i])

    group_tprs, group_fprs = {}, {}
    for group in group_preds.keys():
        preds = np.array(group_preds[group])
        labels = np.array(group_labels[group])
        group_tprs[group] = calculate_tpr(labels, preds)
        group_fprs[group] = calculate_fpr(labels, preds)

    tpr_disparity = max(group_tprs.values()) - min(group_tprs.values())
    fpr_disparity = max(group_fprs.values()) - min(group_fprs.values())

    return group_tprs, group_fprs, tpr_disparity, fpr_disparity

In [41]:
group_tprs, group_fprs, tpr_disparity, fpr_disparity = evaluate_equalized_odds(deep_copy_model, val_loader, group_inds)
print(f"Group TPRs: {group_tprs}")
print(f"Group FPRs: {group_fprs}")
print(f"TPR Disparity: {tpr_disparity:.4f}")
print(f"FPR Disparity: {fpr_disparity:.4f}")

Group TPRs: {0.0: 0.9013157894736842, 1.0: 0.8986175115207373, 2.0: 0.898989898989899}
Group FPRs: {0.0: 0.03773584905660377, 1.0: 0.018656716417910446, 2.0: 0.03816793893129771}
TPR Disparity: 0.0027
FPR Disparity: 0.0195


# Demographic Parity

In [42]:
def calculate_ppr(preds):
    return np.mean(preds)


def evaluate_demographic_parity(model, val_loader, group_inds, device="cuda"):
    """
    Evaluate Demographic Parity.
    Ensures PPR is correctly normalized as probabilities.
    """
    model.eval()
    group_preds = {i: [] for i in set(group_inds)}

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(val_loader):
            inputs = inputs.to(device)
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            # Get the group indices for the current batch
            batch_start = batch_idx * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_inds[batch_start:batch_end]

            # Assign predictions to the corresponding group
            for i, group in enumerate(batch_groups):
                group_preds[group].append(preds[i])

    group_pprs = {}
    for group in group_preds.keys():
        # Flatten the predictions list for each group and normalize
        preds = np.array(group_preds[group]).flatten()
        positive_preds = (preds == 1).sum()  # Count positive predictions
        total_preds = len(preds)  # Total number of predictions
        group_pprs[group] = positive_preds / total_preds if total_preds > 0 else 0.0

    # Calculate the disparity in PPRs across groups
    min_ppr = min(group_pprs.values())
    max_ppr = max(group_pprs.values())
    ppr_disparity = max_ppr - min_ppr

    return group_pprs, ppr_disparity

In [43]:
group_pprs, ppr_disparity = evaluate_demographic_parity(deep_copy_model, val_loader, group_inds)
print(f"Group PPRs: {group_pprs}")
print(f"PPR Disparity: {ppr_disparity:.4f}")

Group PPRs: {0.0: 0.3983516483516483, 1.0: 0.41237113402061853, 2.0: 0.40869565217391307}
PPR Disparity: 0.0140


# False Negative for COMPAS dataset

In [44]:
from sklearn.metrics import confusion_matrix
import numpy as np
from tqdm import tqdm

def calculate_fnr_fpr(model, val_loader, group_inds, device="cuda"):
    """
    Calculate False Negative Rate (FNR) and False Positive Rate (FPR) for each group.
    """
    model.eval()  # Set model to evaluation mode
    group_metrics = {g: {"FN": 0, "FP": 0, "TP": 0, "TN": 0} for g in set(group_inds)}  # Metrics for each group

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(tqdm(val_loader, desc="Calculating FNR and FPR")):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            labels = labels.cpu().numpy()

            # Get the groups for the current batch
            batch_start = batch_idx * val_loader.batch_size
            batch_end = batch_start + len(labels)
            batch_groups = group_inds[batch_start:batch_end]

            for i, group in enumerate(batch_groups):
                if group not in group_metrics:
                    continue  # Skip if group is not defined
                
                # Update confusion matrix components
                if labels[i] == 1 and preds[i] == 0:  # False Negative
                    group_metrics[group]["FN"] += 1
                elif labels[i] == 0 and preds[i] == 1:  # False Positive
                    group_metrics[group]["FP"] += 1
                elif labels[i] == 1 and preds[i] == 1:  # True Positive
                    group_metrics[group]["TP"] += 1
                elif labels[i] == 0 and preds[i] == 0:  # True Negative
                    group_metrics[group]["TN"] += 1

    # Calculate FNR and FPR for each group
    group_fnr_fpr = {}
    for group, metrics in group_metrics.items():
        fn = metrics["FN"]
        fp = metrics["FP"]
        tp = metrics["TP"]
        tn = metrics["TN"]

        actual_positives = tp + fn
        actual_negatives = tn + fp

        fnr = fn / (actual_positives + 1e-8) if actual_positives > 0 else 0.0
        fpr = fp / (actual_negatives + 1e-8) if actual_negatives > 0 else 0.0

        group_fnr_fpr[group] = {"FNR": fnr, "FPR": fpr}

    # Print FNR and FPR for each group
    print("\nGroup FNR and FPR:")
    for group, metrics in group_fnr_fpr.items():
        print(f"Group {group}: FNR = {metrics['FNR']:.4f}, FPR = {metrics['FPR']:.4f}")

    # Calculate and print disparities
    fnr_values = [metrics["FNR"] for metrics in group_fnr_fpr.values()]
    fpr_values = [metrics["FPR"] for metrics in group_fnr_fpr.values()]
    fnr_disparity = max(fnr_values) - min(fnr_values)
    fpr_disparity = max(fpr_values) - min(fpr_values)

    print(f"\nFNR Disparity (Max - Min): {fnr_disparity:.4f}")
    print(f"FPR Disparity (Max - Min): {fpr_disparity:.4f}")

    return group_fnr_fpr, fnr_disparity, fpr_disparity


# Example usage
group_fnr_fpr, fnr_disparity, fpr_disparity = calculate_fnr_fpr(model_before_mitigating, val_loader, group_inds)

Calculating FNR and FPR: 100%|██████████| 46/46 [00:00<00:00, 323.07it/s]


Group FNR and FPR:
Group 0.0: FNR = 0.0987, FPR = 0.0377
Group 1.0: FNR = 0.1014, FPR = 0.0187
Group 2.0: FNR = 0.1010, FPR = 0.0382

FNR Disparity (Max - Min): 0.0027
FPR Disparity (Max - Min): 0.0195





# Machine Unlearning

In [22]:
harmful_indices = debiased_inds

def remove_influence(model, dataloader, harmful_indices, factor, device="cuda"):
    model.eval()
    harmful_dataset = torch.utils.data.Subset(dataloader.dataset, harmful_indices)
    harmful_loader = torch.utils.data.DataLoader(harmful_dataset, batch_size=1)

    for inputs, labels in harmful_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)

        loss = torch.nn.functional.cross_entropy(outputs, labels)
        grads = torch.autograd.grad(loss, model.parameters(), retain_graph=True)

        with torch.no_grad():
            for param, grad in zip(model.parameters(), grads):
                param -= grad * factor

    return model

results ={'factor':[], 'model':[], 'min':[], 'max':[], 'gap':[]}
factors = np.linspace(0.0001, 0.01, 20)

for factor in factors:
    newdeepmodel = copy.deepcopy(deep_copy_model)
    m = remove_influence(newdeepmodel, train_loader, harmful_indices, factor, device="cuda")
    wga, group_accs = evaluate_worst_group_accuracy(m, val_loader, group_inds, device="cuda")
    current_gap = (max(group_accs.values()) - wga)
    results['model'].append(m)
    results['min'].append(wga)
    results['max'].append(max(group_accs.values()))
    results['gap'].append(current_gap)
    results['factor'].append(factor)   

Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 697.86it/s]


Group 0.0 Accuracy: 0.7047
Group 1.0 Accuracy: 0.6680
Group 2.0 Accuracy: 0.6957


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 699.77it/s]


Group 0.0 Accuracy: 0.7033
Group 1.0 Accuracy: 0.6742
Group 2.0 Accuracy: 0.6913


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 694.28it/s]


Group 0.0 Accuracy: 0.7005
Group 1.0 Accuracy: 0.6784
Group 2.0 Accuracy: 0.6913


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 680.39it/s]


Group 0.0 Accuracy: 0.6964
Group 1.0 Accuracy: 0.6784
Group 2.0 Accuracy: 0.6870


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 695.17it/s]


Group 0.0 Accuracy: 0.7019
Group 1.0 Accuracy: 0.6742
Group 2.0 Accuracy: 0.6826


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 703.05it/s]


Group 0.0 Accuracy: 0.7033
Group 1.0 Accuracy: 0.6742
Group 2.0 Accuracy: 0.6826


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 694.96it/s]


Group 0.0 Accuracy: 0.7033
Group 1.0 Accuracy: 0.6722
Group 2.0 Accuracy: 0.6783


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 681.40it/s]


Group 0.0 Accuracy: 0.6992
Group 1.0 Accuracy: 0.6680
Group 2.0 Accuracy: 0.6826


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 681.28it/s]


Group 0.0 Accuracy: 0.6978
Group 1.0 Accuracy: 0.6742
Group 2.0 Accuracy: 0.6783


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 681.23it/s]


Group 0.0 Accuracy: 0.7019
Group 1.0 Accuracy: 0.6680
Group 2.0 Accuracy: 0.6739


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 687.80it/s]


Group 0.0 Accuracy: 0.7033
Group 1.0 Accuracy: 0.6660
Group 2.0 Accuracy: 0.6696


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 691.63it/s]


Group 0.0 Accuracy: 0.7019
Group 1.0 Accuracy: 0.6619
Group 2.0 Accuracy: 0.6609


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 696.80it/s]


Group 0.0 Accuracy: 0.6992
Group 1.0 Accuracy: 0.6680
Group 2.0 Accuracy: 0.6652


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 701.83it/s]


Group 0.0 Accuracy: 0.6964
Group 1.0 Accuracy: 0.6557
Group 2.0 Accuracy: 0.6652


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 673.99it/s]


Group 0.0 Accuracy: 0.6992
Group 1.0 Accuracy: 0.6557
Group 2.0 Accuracy: 0.6609


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 698.63it/s]


Group 0.0 Accuracy: 0.6964
Group 1.0 Accuracy: 0.6557
Group 2.0 Accuracy: 0.6609


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 703.90it/s]


Group 0.0 Accuracy: 0.6964
Group 1.0 Accuracy: 0.6536
Group 2.0 Accuracy: 0.6609


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 701.98it/s]


Group 0.0 Accuracy: 0.6937
Group 1.0 Accuracy: 0.6495
Group 2.0 Accuracy: 0.6565


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 705.74it/s]


Group 0.0 Accuracy: 0.6978
Group 1.0 Accuracy: 0.6495
Group 2.0 Accuracy: 0.6565


Evaluating WGA: 100%|██████████| 46/46 [00:00<00:00, 706.27it/s]

Group 0.0 Accuracy: 0.7005
Group 1.0 Accuracy: 0.6598
Group 2.0 Accuracy: 0.6609





In [23]:
import pandas as pd

df = pd.DataFrame(results).sort_values('factor')
df

Unnamed: 0,factor,model,min,max,gap
0,0.0001,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.668041,0.70467,0.036629
1,0.000621,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.674227,0.703297,0.02907
2,0.001142,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.678351,0.700549,0.022199
3,0.001663,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.678351,0.696429,0.018078
4,0.002184,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.674227,0.701923,0.027696
5,0.002705,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.674227,0.703297,0.02907
6,0.003226,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.672165,0.703297,0.031132
7,0.003747,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.668041,0.699176,0.031135
8,0.004268,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.674227,0.697802,0.023575
9,0.004789,COMPASModel(\n (fc): Sequential(\n (0): Li...,0.668041,0.701923,0.033882


Now, it's time to investigate what are the best approaches to machine unlearning and how can we formulate that.

What are the other approaches to machine unlearning?

Other Fairness notations might come in handy!