In [10]:
import torch
from torch.utils.data import Dataset, DataLoader

In [11]:
class AIF360TorchDataset(Dataset):
    """
    Wraps an AIF360 BinaryLabelDataset (or similar) as a PyTorch Dataset.
    Exposes:
      - features:          torch.float32 tensor of shape (n_samples, n_features)
      - labels:            torch.long tensor of shape (n_samples,)
      - protected_attrs:   torch.float32 tensor of shape (n_samples, n_protected_attrs)
    """

    def __init__(self, aif360_dataset, include_protected=True, transform=None):
        """
        Args:
            aif360_dataset:      an AIF360 dataset object (e.g. AdultDataset(), COMPASDataset(), etc.)
            include_protected:   if True, will also expose protected_attributes
            transform:           optional callable to apply to each feature vector
        """
        # raw numpy arrays from AIF360
        X = aif360_dataset.features
        y = aif360_dataset.labels.ravel()
        
        # protected_attributes is shape (n_samples, n_protected_attrs)
        if include_protected:
            prot = aif360_dataset.protected_attributes
            self.protected_attrs = torch.tensor(prot, dtype=torch.float32)
        else:
            self.protected_attrs = None
        
        # convert to torch tensors
        self.features = torch.tensor(X, dtype=torch.float32)
        self.labels   = torch.tensor(y, dtype=torch.long)
        self.transform = transform

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, idx):
        x = self.features[idx]
        if self.transform:
            x = self.transform(x)
        sample = {
            'features': x,
            'label':     self.labels[idx]
        }
        if self.protected_attrs is not None:
            sample['protected_attributes'] = self.protected_attrs[idx]
        return sample

In [12]:
from aif360.datasets import AdultDataset

help(AdultDataset)

Help on class AdultDataset in module aif360.datasets.adult_dataset:

class AdultDataset(aif360.datasets.standard_dataset.StandardDataset)
 |  AdultDataset(label_name='income-per-year', favorable_classes=['>50K', '>50K.'], protected_attribute_names=['race', 'sex'], privileged_classes=[['White'], ['Male']], instance_weights_name=None, categorical_features=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'native-country'], features_to_keep=[], features_to_drop=['fnlwgt'], na_values=['?'], custom_preprocessing=None, metadata={'label_maps': [{1.0: '>50K', 0.0: '<=50K'}], 'protected_attribute_maps': [{1.0: 'White', 0.0: 'Non-white'}, {1.0: 'Male', 0.0: 'Female'}]})
 |  
 |  Adult Census Income Dataset.
 |  
 |  See :file:`aif360/data/raw/adult/README.md`.
 |  
 |  Method resolution order:
 |      AdultDataset
 |      aif360.datasets.standard_dataset.StandardDataset
 |      aif360.datasets.binary_label_dataset.BinaryLabelDataset
 |      aif360.datasets.structured_dat

In [15]:
# 1. load an AIF360 dataset
from aif360.datasets import AdultDataset
aif_data = AdultDataset(protected_attribute_names=['sex'], privileged_classes=[['Male']], categorical_features=[], features_to_keep=['age', 'education-num'])

# 2. wrap it
torch_ds = AIF360TorchDataset(aif_data, include_protected=True)

# 3. build a DataLoader
loader = DataLoader(torch_ds, batch_size=64, shuffle=True)

In [21]:
batch = next(iter(loader))

features, label, protected_attrs = batch['features'], batch['label'], batch['protected_attributes']
print(f"features: {features.shape}")
print(f"label: {label.shape}")
print(f"protected_attrs: {protected_attrs.shape}")

features: torch.Size([64, 3])
label: torch.Size([64])
protected_attrs: torch.Size([64, 1])
