<a href="https://colab.research.google.com/github/InesBi/fairness-and-privacy/blob/master/empirical_sensitivity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

import statments

In [1]:
!git clone https://github.com/ecreager/ammi-fairness-and-privacy.git

Cloning into 'ammi-fairness-and-privacy'...
remote: Enumerating objects: 113, done.[K
remote: Counting objects: 100% (113/113), done.[K
remote: Compressing objects: 100% (88/88), done.[K
remote: Total 113 (delta 34), reused 90 (delta 19), pack-reused 0[K
Receiving objects: 100% (113/113), 27.11 MiB | 43.11 MiB/s, done.
Resolving deltas: 100% (34/34), done.


In [2]:
%cd /content/ammi-fairness-and-privacy/assignment2

/content/ammi-fairness-and-privacy/assignment2


In [0]:
"""Empirical Sensitivity."""
import argparse
import os

import numpy as np
import torch
from torch import nn

from utils import get_data_loaders
from logistic_regression import nonprivate_logistic_regression
from torchvision import transforms

your code

In [0]:
def plot_hist(array_of_empirical_sensitivities, n, lmbda, name):
    if not isinstance(array_of_empirical_sensitivities, np.ndarray):
        raise ValueError('array_of_empirical_sensitivities should be a np.ndarray.')
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    ################################################################
    # TODO(student): replace below with correct theoretical max sensitivity
    max_theoretical_sensitivity = -1.
    ################################################################

    num_bins = 20
    dirname = './figs'
    filename = os.path.join(dirname, name) + '.histogram.png'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    fig, ax = plt.subplots()
    ax.set_xscale('log')
    bin_values, _, _ = ax.hist(array_of_empirical_sensitivities, 
            num_bins, label='empirical sensitivities')
    ax.set_title('histogram of sensitivities: ' + name)
    ax.axvline(x=max_theoretical_sensitivity, color='r', linestyle='dashed', linewidth=2,
            label='theoretical max sensitivity')
    ax.legend()
    fig.savefig(filename)
    return filename


def plot_extreme_neighbors(sensitivities, list_of_neighboring_examples, name):
    """Plots to disk the neighboring-example pairs with the most and least empirical sensitivity
    
    Note on the data structures used: 
        sensitivities: a np.ndarray containing empirical sensitivities for each run
        list_of_neighboring_examples: a list of neighboring example pairs, one for each run. in other words:
        
        list_of_neighboring_examples = [
            neighboring_example_1, 
            neighboring_example_2,  
            ...
            neighboring_example_n,
            ]
            
        where each tuple in the list represents the data diff between the neighboring 
        datasets and is formatted like this:
        
        neighboring_example_i = (
            (neighbor_img_i, neighbor_label_i),
            (neighbor_img_i_prime, neighbor_label_i_prime),
        )
        
        See utils.py if you are still confused.
    """
    if not isinstance(sensitivities, np.ndarray):
        raise ValueError('sensitivies should be a np.ndarray.')
    first_neighbor_pair = list_of_neighboring_examples[0]
    if not isinstance(list_of_neighboring_examples, list) or not isinstance(first_neighbor_pair, tuple) \
            or not isinstance(first_neighbor_pair[0][0], torch.Tensor):
        raise ValueError('list_of_neighboring_examples should be a list of tuple pairs, where tuple contains img tensors')
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    ############################################################################
    # TODO(student)
    #
    # using list_of_empirical_sensitivies and neighboring_examples, create two image plots
    # 1) side-by-side images for neighbor-pair that maximizes sensitivity
    # 2) side-by-side images for neighbor-pair that minimizes sensitivity
    #
    # matplotlib.subplots and matplotlib.imshow may come in handy
    
    #filenames = None, None
    #raise NotImplementedError
    pil_transform = transforms.ToPILImage()
    max_idx = np.argmax(sensitivities)
    min_idx = np.argmin(sensitivities)
    max_neighbor_example = list_of_neighboring_examples[max_idx]
    min_neighbor_example = list_of_neighboring_examples[min_idx]
    max_neighbor, max_neighbor_prime = max_neighbor_example
    min_neighbor, min_neighbor_prime = min_neighbor_example
    
    max_neighbor_img, max_neighbor_target = max_neighbor
    max_neighbor_prime_img, max_neighbor_prime_target = max_neighbor_prime
    
    min_neighbor_img, min_neighbor_target = min_neighbor
    min_neighbor_prime_img, min_neighbor_prime_target = min_neighbor_prime
    
    dirname = "./figs"
    filename1 = os.path.join(dirname, name) + ".max_sensitivity.png"
    filename2 = os.path.join(dirname, name) + ".min_sensitivity.png"
    filenames = filename1, filename2
    
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    fig1, axes1 = plt.subplots(1,2,figsize=(9,3))
    axes1[0].imshow(pil_transform(max_neighbor_img))
    axes1[1].imshow(pil_transform(max_neighbor_prime_img))
    fig1.suptitle("maximum sensitivity:" + name)
    fig1.savefig(filename1)
    
    
    fig2, axes2 = plt.subplots(1,2,figsize = (9,3))
    axes2[0].imshow(pil_transform(min_neighbor_img))
    axes2[1].imshow(pil_transform(min_neighbor_prime_img))
    fig2.suptitle("minimum sensitivity:" + name)
    fig2.savefig(filename2)
    ############################################################################

    return filenames


def compute_empricial_sensivity(train_loader, neighbor_loader,
        num_epochs, learning_rate, lmbda, model_seed=None):
    ############################################################################
    # TODO(student)
    #
    # your code here...
    #
    train_param = nonprivate_logistic_regression(train_loader, num_epochs, learning_rate, lmbda,model_seed)
    neighbor_param = nonprivate_logistic_regression(neighbor_loader, num_epochs, learning_rate, lmbda, model_seed)
    
    sensitivity = torch.norm(train_param["weight"] - neighbor_param["weight"], p=2)
    #raise NotImplementedError
    ############################################################################

    return sensitivity

main function

In [0]:
def main(n, runs, epochs, lr, batch_size, model_seed, lmbda):
    list_of_empirical_sensitivies = []
    list_of_neighboring_examples = []
    for data_seed in range(runs):
        loaders, neighboring_examples = get_data_loaders(data_seed, batch_size, 
                num_train=n)
        sensitivity = compute_empricial_sensivity(
                loaders['train'], loaders['neighbor'],
                epochs, lr, lmbda, model_seed)
        list_of_empirical_sensitivies.append(sensitivity)
        list_of_neighboring_examples.append(neighboring_examples)

    list_of_empirical_sensitivies = np.array(list_of_empirical_sensitivies)
    sensitivity_upper_bound = 3.
    name = 'lambda={},n={}'.format(lmbda, n)
    filename = plot_hist(list_of_empirical_sensitivies, n, lmbda, name)
    print('see plot at', filename)

    filenames = plot_extreme_neighbors(list_of_empirical_sensitivies, list_of_neighboring_examples, name)
    print('see plots at {} and {}'.format(*filenames))

arguments and main function call

In [6]:
N = 1000
RUNS = 4  # TODO(student): run more times once your code works; something like 100
EPOCHS = 100
LR = 0.1
BATCH_SIZE = 256
MODEL_SEED = 0
LMBDA = 5e-4

main(N, RUNS, EPOCHS, LR, BATCH_SIZE, MODEL_SEED, LMBDA)

  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../../data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 28054893.37it/s]                            


Extracting ../../data/MNIST/raw/train-images-idx3-ubyte.gz


32768it [00:00, 437002.24it/s]
  1%|          | 16384/1648877 [00:00<00:11, 144242.29it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ../../data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 7137850.26it/s]                           
8192it [00:00, 185118.92it/s]


Extracting ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!


100%|██████████| 100/100 [00:11<00:00,  8.42it/s]
100%|██████████| 100/100 [00:10<00:00,  9.37it/s]
100%|██████████| 100/100 [00:09<00:00, 10.50it/s]
100%|██████████| 100/100 [00:09<00:00, 10.38it/s]
100%|██████████| 100/100 [00:09<00:00,  9.23it/s]
100%|██████████| 100/100 [00:09<00:00, 10.45it/s]
100%|██████████| 100/100 [00:09<00:00, 10.40it/s]
100%|██████████| 100/100 [00:09<00:00, 10.50it/s]


see plot at ./figs/lambda=0.0005,n=1000.histogram.png
see plots at ./figs/lambda=0.0005,n=1000.max_sensitivity.png and ./figs/lambda=0.0005,n=1000.min_sensitivity.png
