import statments

In [1]:
"""Empirical Sensitivity."""
import argparse
import os

import numpy as np
import torch
from torch import nn
from utils import get_data_loaders
import torchvision.transforms as transforms
from logistic_regression import nonprivate_logistic_regression 

your code

In [2]:
def plot_hist(array_of_empirical_sensitivities, n, lmbda, name):
    if not isinstance(array_of_empirical_sensitivities, np.ndarray):
        raise ValueError('array_of_empirical_sensitivities should be a np.ndarray.')
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    ################################################################
    # TODO(student): replace below with correct theoretical max sensitivity
    max_theoretical_sensitivity = 2/(n*lmbda)
    ################################################################

    num_bins = 20
    dirname = './figs'
    filename = os.path.join(dirname, name) + '.histogram.png'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    fig, ax = plt.subplots()
    ax.set_xscale('log')
    bin_values, _, _ = ax.hist(array_of_empirical_sensitivities, 
            num_bins, label='empirical sensitivities')
    ax.set_title('histogram of sensitivities: ' + name)
    ax.axvline(x=max_theoretical_sensitivity, color='r', linestyle='dashed', linewidth=2,
            label='theoretical max sensitivity')
    ax.legend()
    fig.savefig(filename)
    return filename


def plot_extreme_neighbors(sensitivities, list_of_neighboring_examples, name):
    """Plots to disk the neighboring-example pairs with the most and least empirical sensitivity
    
    Note on the data structures used: 
        sensitivities: a np.ndarray containing empirical sensitivities for each run
        list_of_neighboring_examples: a list of neighboring example pairs, one for each run. in other words:
        
        list_of_neighboring_examples = [
            neighboring_example_1, 
            neighboring_example_2,  
            ...
            neighboring_example_n,
            ]
            
        where each tuple in the list represents the data diff between the neighboring 
        datasets and is formatted like this:
        
        neighboring_example_i = (
            (neighbor_img_i, neighbor_label_i),
            (neighbor_img_i_prime, neighbor_label_i_prime),
        )
        
        See utils.py if you are still confused.
    """
    if not isinstance(sensitivities, np.ndarray):
        raise ValueError('sensitivies should be a np.ndarray.')
    first_neighbor_pair = list_of_neighboring_examples[0]
    if not isinstance(list_of_neighboring_examples, list) or not isinstance(first_neighbor_pair, tuple) \
            or not isinstance(first_neighbor_pair[0][0], torch.Tensor):
        raise ValueError('list_of_neighboring_examples should be a list of tuple pairs, where tuple contains img tensors')
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    ############################################################################
    # TODO(student)
    #
    # using list_of_empirical_sensitivies and neighboring_examples, create two image plots
    # 1) side-by-side images for neighbor-pair that maximizes sensitivity
    # 2) side-by-side images for neighbor-pair that minimizes sensitivity
    
    
    # matplotlib.subplots and matplotlib.imshow may come in handy
    #
    #filenames = None, None
    
    dirname = './figs'
    filename1 = os.path.join(dirname, name) + '.maxisen.png'
    filename2 = os.path.join(dirname, name) + '.minsen.png'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    
    Index_max_neighbor = np.argmax(sensitivities)
    Index_min_neighbor = np.argmin(sensitivities)
    
    max_neighbor = list_of_neighboring_examples[Index_max_neighbor]
    min_neighbor = list_of_neighboring_examples[Index_min_neighbor ]
    
    var_max, var_max_prime = max_neighbor
    var_min, var_min_prime = min_neighbor
    
    var_max_image, var_max_label = var_max
    var_max_prime_image, var_max_prime_label = var_max_prime   
    
    var_min_image, var_min_label = var_min
    var_min_prime_image, var_min_prime_label = var_min_prime 
    
    figure1, ax1s = plt.subplots(1,2)
    figure2, ax2s = plt.subplots(1,2)
    
    trans = transforms.ToPILImage()
    ax1s[0].imshow(trans(var_max_image))
    ax1s[1].imshow(trans(var_max_prime_image))
    
    
    ax2s[0].imshow(trans(var_min_image))
    ax2s[1].imshow(trans(var_min_prime_image))
    
    figure1.savefig(filename1)
    figure2.savefig(filename2)
    
    filenames = filename1, filename2
    
    # raise NotImplementedError
    ############################################################################

    return filenames


def compute_empricial_sensivity(train_loader, neighbor_loader,
        num_epochs, learning_rate, lmbda, model_seed=None):
    ############################################################################
    # TODO(student)
    #
    # your code here...
    
    model_state_dict_train = nonprivate_logistic_regression(train_loader, num_epochs, learning_rate,lmbda, model_seed)
    model_state_dict_neighbor = nonprivate_logistic_regression(neighbor_loader, num_epochs,learning_rate, lmbda, model_seed)
    
    weight_train = model_state_dict_train['weight']
    weight_neighbor = model_state_dict_neighbor['weight']
    
    sensitivity = torch.norm((weight_train - weight_neighbor),p = 2)
    
    # raise NotImplementedError
    ############################################################################

    return sensitivity

main function

In [3]:
def main(n, runs, epochs, lr, batch_size, model_seed, lmbda):
    list_of_empirical_sensitivies = []
    list_of_neighboring_examples = []
    for data_seed in range(runs):
        loaders, neighboring_examples = get_data_loaders(data_seed, batch_size, 
                num_train=n)
        sensitivity = compute_empricial_sensivity(
                loaders['train'], loaders['neighbor'],
                epochs, lr, lmbda, model_seed)
        list_of_empirical_sensitivies.append(sensitivity)
        list_of_neighboring_examples.append(neighboring_examples)

    list_of_empirical_sensitivies = np.array(list_of_empirical_sensitivies)
    sensitivity_upper_bound = 3.
    name = 'lambda={},n={}'.format(lmbda, n)
    filename = plot_hist(list_of_empirical_sensitivies, n, lmbda, name)
    print('see plot at', filename)

    filenames = plot_extreme_neighbors(list_of_empirical_sensitivies, list_of_neighboring_examples, name)
    print('see plots at {} and {}'.format(*filenames))

arguments and main function call

In [4]:
N = 1000
RUNS = 100  # TODO(student): run more times once your code works; something like 100
EPOCHS = 100
LR = 0.1
BATCH_SIZE = 256
MODEL_SEED = 0
LMBDA = 5e-4

main(N, RUNS, EPOCHS, LR, BATCH_SIZE, MODEL_SEED, LMBDA)

100%|██████████| 100/100 [00:10<00:00,  9.94it/s]
100%|██████████| 100/100 [00:10<00:00,  9.32it/s]
100%|██████████| 100/100 [00:11<00:00,  8.77it/s]
100%|██████████| 100/100 [00:10<00:00,  8.22it/s]
100%|██████████| 100/100 [00:11<00:00,  9.41it/s]
100%|██████████| 100/100 [00:10<00:00,  9.66it/s]
100%|██████████| 100/100 [00:10<00:00,  9.37it/s]
100%|██████████| 100/100 [00:10<00:00,  9.62it/s]
100%|██████████| 100/100 [00:10<00:00,  9.44it/s]
100%|██████████| 100/100 [00:11<00:00,  6.73it/s]
100%|██████████| 100/100 [00:17<00:00,  3.08it/s]
100%|██████████| 100/100 [00:27<00:00,  5.96it/s]
100%|██████████| 100/100 [00:13<00:00,  9.50it/s]
100%|██████████| 100/100 [00:10<00:00,  9.49it/s]
100%|██████████| 100/100 [00:10<00:00,  9.38it/s]
100%|██████████| 100/100 [00:11<00:00,  6.55it/s]
100%|██████████| 100/100 [00:11<00:00,  9.02it/s]
100%|██████████| 100/100 [00:10<00:00,  9.32it/s]
100%|██████████| 100/100 [00:10<00:00,  9.18it/s]
100%|██████████| 100/100 [00:10<00:00,  9.10it/s]


see plot at ./figs/lambda=0.0005,n=1000.histogram.png
see plots at ./figs/lambda=0.0005,n=1000.maxisen.png and ./figs/lambda=0.0005,n=1000.minsen.png


In [5]:
N = 1000
RUNS = 3  # TODO(student): run more times once your code works; something like 100
EPOCHS = 100
LR = 0.1
BATCH_SIZE = 256
MODEL_SEED = 0
LMBDA = 5e-1

main(N, RUNS, EPOCHS, LR, BATCH_SIZE, MODEL_SEED, LMBDA)

100%|██████████| 100/100 [00:11<00:00,  9.37it/s]
100%|██████████| 100/100 [00:10<00:00,  9.46it/s]
100%|██████████| 100/100 [00:10<00:00,  9.55it/s]
100%|██████████| 100/100 [00:10<00:00,  9.23it/s]
100%|██████████| 100/100 [00:10<00:00,  8.72it/s]
100%|██████████| 100/100 [00:11<00:00,  8.90it/s]


see plot at ./figs/lambda=0.5,n=1000.histogram.png
see plots at ./figs/lambda=0.5,n=1000.maxisen.png and ./figs/lambda=0.5,n=1000.minsen.png


In [7]:
N = 1000
RUNS = 5  # TODO(student): run more times once your code works; something like 100
EPOCHS = 100
LR = 0.1
BATCH_SIZE = 256
MODEL_SEED = 0
LMBDA = 5e-9

main(N, RUNS, EPOCHS, LR, BATCH_SIZE, MODEL_SEED, LMBDA)

100%|██████████| 100/100 [00:10<00:00,  9.69it/s]
100%|██████████| 100/100 [00:10<00:00,  9.92it/s]
100%|██████████| 100/100 [00:10<00:00, 10.08it/s]
100%|██████████| 100/100 [00:10<00:00,  7.96it/s]
100%|██████████| 100/100 [00:10<00:00,  9.10it/s]
100%|██████████| 100/100 [00:11<00:00,  9.83it/s]
100%|██████████| 100/100 [00:10<00:00,  8.33it/s]
100%|██████████| 100/100 [00:10<00:00,  8.84it/s]
100%|██████████| 100/100 [00:10<00:00,  9.82it/s]
100%|██████████| 100/100 [00:10<00:00,  8.40it/s]


see plot at ./figs/lambda=5e-09,n=1000.histogram.png
see plots at ./figs/lambda=5e-09,n=1000.maxisen.png and ./figs/lambda=5e-09,n=1000.minsen.png


## Bonus: Describe how the histogram changes in response to changes in λ. What about in response to changes in n? When we increase the value of λ, the theoritical max sensitivity decrease and is so close to empirical sentivities. When we decrease the value of λ the theoretical max sentivity increase and is move away from empirical.