imports

In [1]:
"""Private Training by Output Perturbation."""
import argparse

import numpy as np
from scipy.stats import ortho_group
import torch
from torch.distributions.gamma import Gamma
from torch import nn

from logistic_regression import nonprivate_logistic_regression
from utils import get_data_loaders

helper functions

In [2]:
def gamma_sample_pytorch_parameterization(concentration, rate):
    """The Gamma dist'n as it is parameterized in PyTorch"""
    return Gamma(concentration, rate).sample()


def gamma_sample_chaudhuri_parameterization(concentration, scale):
    """The Gamma dist'n as it is parameterized in Chaudhuri and Monteleoni"""
    rate = 1. / scale
    return gamma_sample_pytorch_parameterization(concentration, rate)


def random_unit_norm_vector(num_dims):
    random_rotation_matrix = ortho_group.rvs(num_dims)
    basis_vector_one = np.eye(num_dims)[0]
    vector = np.matmul(random_rotation_matrix, basis_vector_one)
    return torch.tensor(vector, dtype=torch.float32)

# True/False propositions

With $\lambda$ and $\epsilon$ fixed, increasing n requires Algorithm 1 to add more noise to w*: False

With $\lambda$ and n fixed, increasing $\epsilon$ requires Algorithm 1 to add more noise to w*: False

With $\epsilon$  and n fixed, increasing  $\lambda$ requires Algorithm 1 to add more noise to w*: False

your code

In [3]:
def private_logistic_regression(dset_loader, num_epochs, learning_rate,
    lmbda, epsilon, seed=None):
    ############################################################################
    # TODO(student)
    #
    # your code here...
    #
    # hint: use the code we have given you. For example you don't have to 
    # implement non-private logistic regression from scratch because an 
    # implementation exists in logistic_regression.py. There are also functions 
    # in this file for sampling Laplace noise
    #
    # hint: the input dim d can be found as a attr of the dset_loader's dset
    #       >>> num_pixels = dset_loader.dataset.num_pixels
    #
    non_private_train_params = nonprivate_logistic_regression(dset_loader, num_epochs, learning_rate, lmbda, seed)
    d = dset_loader.dataset.num_pixels
    
    norm_eta = gamma_sample_chaudhuri_parameterization(d, 2.0/(len(dset_loader.dataset)*epsilon*lmbda))
    # = np.exp(-((len(dset_loader)*epsilon*lmda)/2)*norm_eta)
    direction = random_unit_norm_vector(d)
    
    eta = norm_eta * direction
    output_weight = non_private_train_params['weight'] + torch.tensor(eta)
    private_params = {
        'weight': torch.tensor(output_weight),  # replace me (but this is how to format the state_dict)
        }
    #raise NotImplementedError
    ############################################################################

    
    return private_params

main function

In [4]:
def main(n, epsilon, lmbda, epochs, batch_size, lr, data_seed, model_seed):
    # load data
    loaders, _ = get_data_loaders(data_seed, batch_size, n)
    loaders.pop('neighbor')  # don't need this loader for this question
  
    # train model
    nonprivate_params = \
            nonprivate_logistic_regression(loaders['train'], epochs, 
                    lr, lmbda, seed=model_seed)
  
    private_params = private_logistic_regression(loaders['train'], epochs, 
        lr, lmbda, epsilon, seed=model_seed)
  
    # evaluate
    test_losses = dict()
    test_accs = dict()
    for name, params in zip(['nonprivate', 'private'], 
          [nonprivate_params, private_params]):
        num_pixels = loaders['train'].dataset.num_pixels
        model = nn.Linear(num_pixels, 1, bias=False)
        criterion = nn.BCEWithLogitsLoss()  # binary cross entropy
        model.load_state_dict(params)
        model.eval()
        num_test_examples = len(loaders['test'].dataset)
        with torch.no_grad():
            test_loss = 0.
            correct = 0
            total = 0
            for images, labels in loaders['test']:
                images = images.reshape(-1, 28*28)
                outputs = model(images)
                loss = criterion(outputs.squeeze(), labels.float())
                test_loss += loss.item() * len(images) / float(num_test_examples)
                predicted = (outputs.squeeze() > 0.).long()
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            test_acc = float(correct) / float(total)
            test_losses[name] = test_loss
            test_accs[name] = 100. * test_acc  # format as a percentage
  
    from pprint import pprint
    print('final test losses')
    print('nonprivate: {nonprivate:.2f}, private: {private:.2f}'
          .format(**test_losses))
    print('final test accs')
    print('nonprivate: {nonprivate:.2f}, private: {private:.2f}'
          .format(**test_accs))

In [8]:
N = 2000 
EPSILON = 2.
LMBDA = 5e-4
EPOCHS = 50  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .1
DATA_SEED = 0
MODEL_SEED = 0
main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)

100%|██████████| 50/50 [00:09<00:00,  5.07it/s]
100%|██████████| 50/50 [00:10<00:00,  4.80it/s]


final test losses
nonprivate: 0.09, private: 3.08
final test accs
nonprivate: 97.50, private: 67.00




arguments and main function call

In [17]:
#N = 3000 
EPSILON = 2.
#LMBDA = 5e-3
EPOCHS = 10  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .1
DATA_SEED = 0
MODEL_SEED = 0
N = [1000, 2000, 3000, 5000]
lmbda = [5e-1, 5e-2, 5e-3, 5e-4]
for l in lmbda:
    print('===========================   lambda = '+str(l)+' ===========================')
    for n in N:
        print('===========================   N = '+str(n)+' ===========================')
        main(n, EPSILON, l, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)
    print('=================================================================')

 10%|█         | 1/10 [00:00<00:01,  8.29it/s]



100%|██████████| 10/10 [00:01<00:00,  9.77it/s]
100%|██████████| 10/10 [00:01<00:00,  9.92it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.64, private: 0.64
final test accs
nonprivate: 96.00, private: 87.00


100%|██████████| 10/10 [00:01<00:00,  5.03it/s]
100%|██████████| 10/10 [00:01<00:00,  4.99it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.65, private: 0.65
final test accs
nonprivate: 95.00, private: 93.50


100%|██████████| 10/10 [00:03<00:00,  3.29it/s]
100%|██████████| 10/10 [00:03<00:00,  3.12it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.65, private: 0.64
final test accs
nonprivate: 93.67, private: 95.33


100%|██████████| 10/10 [00:05<00:00,  2.00it/s]
100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
 10%|█         | 1/10 [00:00<00:01,  8.41it/s]

final test losses
nonprivate: 0.64, private: 0.64
final test accs
nonprivate: 92.80, private: 93.20


100%|██████████| 10/10 [00:01<00:00,  9.44it/s]
100%|██████████| 10/10 [00:01<00:00,  9.13it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.42, private: 0.44
final test accs
nonprivate: 96.00, private: 89.00


100%|██████████| 10/10 [00:02<00:00,  4.92it/s]
100%|██████████| 10/10 [00:01<00:00,  5.03it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.43, private: 0.47
final test accs
nonprivate: 95.50, private: 82.00


100%|██████████| 10/10 [00:02<00:00,  3.37it/s]
100%|██████████| 10/10 [00:03<00:00,  3.28it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.43, private: 0.44
final test accs
nonprivate: 93.67, private: 91.67


100%|██████████| 10/10 [00:04<00:00,  2.00it/s]
100%|██████████| 10/10 [00:04<00:00,  2.07it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

final test losses
nonprivate: 0.43, private: 0.43
final test accs
nonprivate: 94.60, private: 94.60


100%|██████████| 10/10 [00:00<00:00,  9.99it/s]
100%|██████████| 10/10 [00:00<00:00, 10.01it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.32, private: 4.11
final test accs
nonprivate: 96.00, private: 34.00


100%|██████████| 10/10 [00:02<00:00,  4.92it/s]
100%|██████████| 10/10 [00:02<00:00,  4.90it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.22, private: 1.09
final test accs
nonprivate: 97.00, private: 52.50


100%|██████████| 10/10 [00:03<00:00,  3.34it/s]
100%|██████████| 10/10 [00:02<00:00,  3.37it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.20, private: 0.26
final test accs
nonprivate: 95.33, private: 88.00


100%|██████████| 10/10 [00:04<00:00,  2.01it/s]
100%|██████████| 10/10 [00:04<00:00,  2.01it/s]
 10%|█         | 1/10 [00:00<00:00,  9.55it/s]

final test losses
nonprivate: 0.19, private: 0.13
final test accs
nonprivate: 95.80, private: 96.20


100%|██████████| 10/10 [00:01<00:00,  9.75it/s]
100%|██████████| 10/10 [00:01<00:00,  9.86it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.31, private: 5.09
final test accs
nonprivate: 96.00, private: 79.00


100%|██████████| 10/10 [00:02<00:00,  4.98it/s]
100%|██████████| 10/10 [00:01<00:00,  5.03it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.19, private: 5.90
final test accs
nonprivate: 97.00, private: 60.50


100%|██████████| 10/10 [00:03<00:00,  3.32it/s]
100%|██████████| 10/10 [00:02<00:00,  3.34it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

final test losses
nonprivate: 0.17, private: 6.18
final test accs
nonprivate: 95.33, private: 44.67


100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
100%|██████████| 10/10 [00:05<00:00,  1.91it/s]


final test losses
nonprivate: 0.13, private: 1.56
final test accs
nonprivate: 96.00, private: 66.20
