imports

In [1]:
"""Private Training by Output Perturbation."""
import argparse

import numpy as np
from scipy.stats import ortho_group
import torch
from torch.distributions.gamma import Gamma
from torch import nn

from logistic_regression import nonprivate_logistic_regression
from utils import get_data_loaders

helper functions

In [2]:
def gamma_sample_pytorch_parameterization(concentration, rate):
    """The Gamma dist'n as it is parameterized in PyTorch"""
    return Gamma(concentration, rate).sample()


def gamma_sample_chaudhuri_parameterization(concentration, scale):
    """The Gamma dist'n as it is parameterized in Chaudhuri and Monteleoni"""
    rate = 1. / scale
    return gamma_sample_pytorch_parameterization(concentration, rate)


def random_unit_norm_vector(num_dims):
    random_rotation_matrix = ortho_group.rvs(num_dims)
    basis_vector_one = np.eye(num_dims)[0]
    vector = np.matmul(random_rotation_matrix, basis_vector_one)
    return torch.tensor(vector, dtype=torch.float32)

your code

In [3]:
def private_logistic_regression(dset_loader, num_epochs, learning_rate,
    lmbda, epsilon, seed=None):
    ############################################################################
    # TODO(student)
    #
    # your code here...
    #
    # hint: use the code we have given you. For example you don't have to 
    # implement non-private logistic regression from scratch because an 
    # implementation exists in logistic_regression.py. There are also functions 
    # in this file for sampling Laplace noise
    #
    # hint: the input dim d can be found as a attr of the dset_loader's dset
    #       >>> num_pixels = dset_loader.dataset.num_pixels
    #
        
    num_pixels = dset_loader.dataset.num_pixels
    
    n = len(dset_loader.dataset)
    
    scale = 2/(n*epsilon*lmbda)
    
    noise_dist = random_unit_norm_vector(num_pixels)
    
    noise_norm = gamma_sample_chaudhuri_parameterization(num_pixels,scale)
    
    noise = noise_dist*noise_norm
    
    nonprivate_params = nonprivate_logistic_regression(dset_loader, num_epochs, learning_rate,lmbda, seed)
    
    weight_private = nonprivate_params['weight']+ noise
    
    private_params = {
        'weight': weight_private,  # replace me (but this is how to format the state_dict)
        }
    
    #raise NotImplementedError
    ############################################################################

    
    return private_params

main function

In [4]:
def main(n, epsilon, lmbda, epochs, batch_size, lr, data_seed, model_seed):
    # load data
    loaders, _ = get_data_loaders(data_seed, batch_size, n)
    loaders.pop('neighbor')  # don't need this loader for this question
  
    # train model
    nonprivate_params = \
            nonprivate_logistic_regression(loaders['train'], epochs, 
                    lr, lmbda, seed=model_seed)
  
    private_params = private_logistic_regression(loaders['train'], epochs, 
        lr, lmbda, epsilon, seed=model_seed)
  
    # evaluate
    test_losses = dict()
    test_accs = dict()
    for name, params in zip(['nonprivate', 'private'], 
          [nonprivate_params, private_params]):
        num_pixels = loaders['train'].dataset.num_pixels
        model = nn.Linear(num_pixels, 1, bias=False)
        criterion = nn.BCEWithLogitsLoss()  # binary cross entropy
        model.load_state_dict(params)
        model.eval()
        num_test_examples = len(loaders['test'].dataset)
        with torch.no_grad():
            test_loss = 0.
            correct = 0
            total = 0
            for images, labels in loaders['test']:
                images = images.reshape(-1, 28*28)
                outputs = model(images)
                loss = criterion(outputs.squeeze(), labels.float())
                test_loss += loss.item() * len(images) / float(num_test_examples)
                predicted = (outputs.squeeze() > 0.).long()
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            test_acc = float(correct) / float(total)
            test_losses[name] = test_loss
            test_accs[name] = 100. * test_acc  # format as a percentage
  
    from pprint import pprint
    print('final test losses')
    print('nonprivate: {nonprivate:.2f}, private: {private:.2f}'
          .format(**test_losses))
    print('final test accs')
    print('nonprivate: {nonprivate:.2f}, private: {private:.2f}'
          .format(**test_accs))

arguments and main function call

In [5]:
N = 1000
EPSILON = 4.
LMBDA = 5e-2
EPOCHS = 100  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .1
DATA_SEED = 0
MODEL_SEED = 0

main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)

100%|██████████| 100/100 [00:16<00:00,  6.44it/s]
100%|██████████| 100/100 [00:16<00:00,  6.56it/s]


final test losses
nonprivate: 0.44, private: 0.44
final test accs
nonprivate: 96.00, private: 95.00


 Question answer: Start by answering the following True/False propositions:
1. False
2. False
3. False
Because the scale  formular, when we increase lambda and epsilon the scale become small and the noise_norm is small.

In [6]:
N = 100
EPSILON = 4.
LMBDA = 5e-2
EPOCHS = 100  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .1
DATA_SEED = 0
MODEL_SEED = 0

main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)

100%|██████████| 100/100 [00:01<00:00, 82.64it/s]
100%|██████████| 100/100 [00:01<00:00, 79.94it/s]

final test losses
nonprivate: 0.40, private: 0.86
final test accs
nonprivate: 100.00, private: 70.00





In [7]:
N = 100
EPSILON = 4.
LMBDA = 5e-2
EPOCHS = 1000  # run for more epochs once your code works
BATCH_SIZE = 256
LR = 0.0001
DATA_SEED = 0
MODEL_SEED = 0

main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)

100%|██████████| 1000/1000 [00:11<00:00, 87.28it/s]
100%|██████████| 1000/1000 [00:12<00:00, 81.98it/s]

final test losses
nonprivate: 0.67, private: 0.75
final test accs
nonprivate: 90.00, private: 70.00



