import statements

In [1]:
"""Private Model Selection."""
import argparse
from glob import glob
import os
import random

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from utils import get_data_loaders
import warnings
warnings.filterwarnings("ignore")

In [2]:
from scipy.misc import logsumexp

helper code

In [3]:
def plot_probs(ndarray_of_probs, name):
    if not isinstance(ndarray_of_probs, np.ndarray):
        msg = 'ndarray_of_probs should be a np.ndarray. ' + \
                'Make sure to convert from torch.tensor if need be.'
        raise ValueError(msg)
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    dirname = './figs'
    filename = os.path.join(dirname, name) + '.model-selection-probs.png'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    fig, ax = plt.subplots()
    model_idxs = np.arange(len(ndarray_of_probs))
    ax.bar(model_idxs, ndarray_of_probs)
    ax.set_xlabel('model idx')
    ax.set_ylabel('prob of being selected under Exp Mech')
    ax.set_title(name)
    ax.set_xticks(model_idxs)
    fig.savefig(filename)
    return filename


def load_models(num_pixels):
    """Randomly samples k pre-trained models parameters (from the list of ten)
    """
    list_of_model_filenames = glob('./pretrained_models/*.pt')
    list_of_model_filenames.sort()
    list_of_models = []
    for model_filename in list_of_model_filenames:
        model = nn.Linear(num_pixels, 1, bias=False)
        model.load_state_dict(torch.load(model_filename))
        list_of_models.append(model)
    return list_of_models

your code

In [4]:
def compute_scores(list_of_models, test_loader):
    """Compute score (performance on private test data) for each model"""
    if not isinstance(list_of_models, list):
        raise ValueError('first argument should be a list')
    if not isinstance(test_loader, DataLoader):
        raise ValueError('second argument should be pytorch data loader')

    ############################################################################
    # TODO(student)
    #
    # your code here...
    
    criterion= nn.BCEWithLogitsLoss()
    num_test_examples= len(test_loader.dataset)
    liste= np.zeros(len(list_of_models))
    k=0
    for model in list_of_models:
        
        losses=0
        #loss=0
        for i, (images, labels) in enumerate(test_loader):
            images = images.reshape(-1, 28*28)
            outputs= model(images)
            loss= criterion(outputs.squeeze(), labels.float())
            losses += loss * (len(images) / float(num_test_examples))
            
        liste[k]=1.0 - losses
        k+=1
            
    return liste

    
    
def exponential_mechanism(list_of_models, test_loader, epsilon):
    """Sample from model list, where sampling probability scales with test score
    
    Return both the sampled model and the sample index
    """
    if not isinstance(list_of_models, list):
        raise ValueError('first argument should be a list')
    if not isinstance(test_loader, DataLoader):
        raise ValueError('second argument should be pytorch data loader')

    scores = compute_scores(list_of_models, test_loader)
    num_test_examples = len(test_loader.dataset)

    ############################################################################
    # TODO(student)
    #
    # your code here..

    #sample_probs= np.zeros(len(list_of_models))
    #scores= np.array(scores)
    sensibility = 2/(num_test_examples)
    num = (epsilon*scores)/sensibility
    som = logsumexp((epsilon*scores)/sensibility)
    
    sample_probs = np.array(np.exp(num - som))  
    
    Proba = np.random.choice(sample_probs)
    sampled_idx= np.where(sample_probs == Proba)[0][0]
    
    sampled_model= list_of_models[sampled_idx]
    
    # hint: you're exponential mechanism should somehow depend on the number of
    # training data in test loader
    #
    #raise NotImplementedError
    ############################################################################

    return sampled_model, sampled_idx, sample_probs

## Comment: 

main function

In [5]:
BATCH_SIZE = 250
SEED = 3771

def main(n, epsilon):
    loaders, _ = get_data_loaders(SEED, BATCH_SIZE, 
            num_train=13006, num_test=n)
    num_pixels = loaders['train'].dataset.num_pixels
    models = load_models(num_pixels)

    private_best_model, private_best_model_idx, sample_probs \
            = exponential_mechanism(models, loaders['test'], epsilon)

    print('selected model', private_best_model_idx)
    name = 'eps={},n={}'.format(epsilon, n)
    filename = plot_probs(sample_probs, name)
    print('see plot at', filename)

arguments and main function call

In [6]:
N = [2,10,100,1000]
EPSILON = [1,2,4]

for j in N:
    for i in EPSILON:
        main(j,i)
        #print(type(j),type(i))
    
# TODO(student): sweep over the required values for N and EPSILON and produce 
#                several plots


selected model 9
see plot at ./figs/eps=1,n=2.model-selection-probs.png
selected model 8
see plot at ./figs/eps=2,n=2.model-selection-probs.png
selected model 2
see plot at ./figs/eps=4,n=2.model-selection-probs.png
selected model 7
see plot at ./figs/eps=1,n=10.model-selection-probs.png
selected model 8
see plot at ./figs/eps=2,n=10.model-selection-probs.png
selected model 9
see plot at ./figs/eps=4,n=10.model-selection-probs.png
selected model 1
see plot at ./figs/eps=1,n=100.model-selection-probs.png
selected model 4
see plot at ./figs/eps=2,n=100.model-selection-probs.png
selected model 4
see plot at ./figs/eps=4,n=100.model-selection-probs.png
selected model 5
see plot at ./figs/eps=1,n=1000.model-selection-probs.png
selected model 9
see plot at ./figs/eps=2,n=1000.model-selection-probs.png
selected model 2
see plot at ./figs/eps=4,n=1000.model-selection-probs.png


What score function should we use to compare models on the test set? The score function that we should use to comapre the test set is: 1-loss, because it depend of how good output is for the database.