In [129]:
import os
import numpy as np
import IPython
import copy
from shutil import copyfile

import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets import base


import sys
sys.dont_write_bytecode=True

PACKAGE_PARENT = '../'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))

from influence.inceptionModel import BinaryInceptionModel
from influence.logisticRegressionWithLBFGS import LogisticRegressionWithLBFGS
import influence.experiments
from influence.dataset import DataSet
# from influence.dataset_poisoning import iterative_attack, select_examples_to_attack, get_projection_to_box_around_orig_point, generate_inception_features
from influence.iter_attack import iterative_attack, select_examples_to_attack, get_projection_to_box_around_orig_point, generate_inception_features
from influence.Progress import *

from utils import dataset_metadatas, experiment_result_metadata_to_FN, FN_to_experiment_result_metadata, get_dataset, get_full_model_graph
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

from load_animals import *

from skimage import io

%load_ext autoreload
%autoreload 2

from data_poisoning import data_poisoning

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [74]:
# The goal of this function is to research on the top model transferbility.
# The top models we consider are Logistic Regression and SVM.

# Given which dataset to use, num_to_perterb, IF or FC,
# the function finds the results of data poisoning from the ./Experiment_results/Experiment_1/ folder
# and give the transferbility result: the logits of the true label of target test point 

dataset_classes = dataset_metadatas["Dog-Fish"] 
#num_train_ex_per_class, num_test_ex_per_class = 900, 300
use_IF = True
num_to_perterb = 2


In [51]:
# 1. Get the clean inception data_set 
data_sets = get_dataset(dataset_classes)

Loading animals from disk...
../data/dataset_dog-fish_train-900_test-300.npz


In [53]:
img_side = 299
num_channels = 3 
batch_size = 100
initial_learning_rate = 0.001 
keep_probs = None
decay_epochs = [1000, 10000]
weight_decay = 0.001
    
training_dataset_classes = dataset_classes["classes"]
num_classes = len(training_dataset_classes)
full_graph = tf.Graph()
with full_graph.as_default():
    full_model_name = '%s_inception_wd-%s' % ('_'.join(training_dataset_classes), weight_decay)
    full_model = BinaryInceptionModel(
        img_side=img_side,
        num_channels=num_channels,
        weight_decay=weight_decay,
        num_classes=num_classes, 
        batch_size=batch_size,
        data_sets=data_sets,
        initial_learning_rate=initial_learning_rate,
        keep_probs=keep_probs,
        decay_epochs=decay_epochs,
        mini_batch=True,
        train_dir='output',
        log_dir='log',
        model_name=full_model_name)

self.logits Tensor("Shape:0", shape=(2,), dtype=int32)


[genericNeuralNet.py:191 -   get_vec_to_list_fn() ] Total number of parameters: 2048


wrong_labels_bool Tensor("Shape_2:0", shape=(2,), dtype=int32)
logits Tensor("Shape_3:0", shape=(2,), dtype=int32)
inception_features:  Tensor("flatten/Reshape:0", shape=(?, ?), dtype=float32)
x_poison_features:  Tensor("Gather:0", shape=(1, ?), dtype=float32)
t_target_features:  Tensor("Gather_1:0", shape=(1, ?), dtype=float32)
Lp:  Tensor("norm/Squeeze:0", shape=(), dtype=float32)
LP_gradient Tensor("strided_slice_1:0", shape=(268203,), dtype=float32)


In [55]:
with full_graph.as_default():
    clean_inception_features = full_model.generate_inception_features(data_sets.train, None)

In [86]:
# 2. Find the results from the ./Experiment_results/Experiment_1/ folder
def parse_file_name(fn):
    fn = fn[:-4]
    lst = fn.split('_')
    test_idx = int(lst[-1])
    num_poisoned_training_points = int(lst[-3])
    Experiment_number = int(lst[1])
    contents_type = lst[2]
    method = lst[3]
    return {
        "test_idx":test_idx,
        "num_poisoned_training_points":num_poisoned_training_points,
        "Experiment_number":Experiment_number,
        "contents_type":contents_type,
        "method":method,
    }

source_dir = "Experiment_results/Experiment_1/"
file_names = !ls -tr Experiment_results/Experiment_1/
poisoned_images, poisoned_train_indices, target_test_indices = [], [], []
for i in range(0, len(file_names), 2):
    file_name = file_names[i]
    dataset_name = dataset_classes['name'].replace('-', '_')
    # only look at the results from the specified dataset
    if dataset_name in file_name:
        file = parse_file_name(file_name)
        if file["num_poisoned_training_points"] == num_to_perterb and file["Experiment_number"] == 1 and ((use_IF and file["method"] == 'IF') or (not use_IF and file["method"] == 'FC')):
            if file["contents_type"] == 'indices':
                poisoned_train_index = np.load(source_dir + file_name)
                poisoned_image = np.load(source_dir + file_names[i+1])
            else:
                poisoned_train_index = np.load(source_dir + file_names[i+1])
                poisoned_image = np.load(source_dir + file_name)
            target_test_index = file['test_idx']
            poisoned_images.append(poisoned_image)
            poisoned_train_indices.append(poisoned_train_index)
            target_test_indices.append(target_test_index)
        

In [90]:
# 3. Generate polluted inception features

polluted_inception_features = []
with full_graph.as_default():
    for poison_image, poisoned_train_index in zip(poisoned_images, poisoned_train_indices):
        poisoned_dataset = DataSet(poison_image, data_sets.train.labels[poisoned_train_index])
        polluted_inception_feature = full_model.generate_inception_features(poisoned_dataset, None)
        polluted_inception_features.append(polluted_inception_feature)

In [126]:
# 4. Initialize two top models firstly then compute the final results

C = 1.0 / (len(data_sets.train.x) * .001)  
if num_classes == 2:
    log_reg_model = LogisticRegression(
                C=C,
                tol=1e-8,
                fit_intercept=False, 
                solver='lbfgs',
                warm_start=True, #True
                max_iter=1000)
else:
    log_reg_model = LogisticRegression(
                C=C,
                tol=1e-8,
                fit_intercept=False, 
                solver='lbfgs',
                multi_class='multinomial',
                warm_start=True, #True
                max_iter=1000) 
    
svc_model = SVC(
                C=C,
                kernel='linear', 
                probability = True
                )

logits_log_reg, logits_svc_model = [], []
for polluted_inception_feature, poisoned_train_index, target_test_index in zip(polluted_inception_features, poisoned_train_indices, target_test_indices):
    
    X, Y = np.copy(clean_inception_features), np.copy(data_sets.train.labels)
    X[poisoned_train_index] = polluted_inception_feature
    target_test_dataset = DataSet(np.copy(data_sets.test.x[[target_test_index]]), np.copy(data_sets.test.labels[[target_test_index]]))
    with full_graph.as_default():
        target_test_inception_feature = full_model.generate_inception_features(target_test_dataset, None)
    
    log_reg_model.fit(X, Y)
    svc_model.fit(X, Y)
    
    logits_log_reg.append(log_reg_model.predict_proba(target_test_inception_feature)[0][int(data_sets.test.labels[target_test_index])])
    logits_svc_model.append(svc_model.predict_proba(target_test_inception_feature)[0][int(data_sets.test.labels[target_test_index])])

In [127]:
logits_svc_model

[0.0012622251352524406,
 0.7417485162474685,
 0.008847530885971025,
 5.446869043100385e-09,
 1.3958972658359296e-06,
 0.0036751563655723117,
 0.44310249340949986]

In [128]:
logits_log_reg

[0.026601219866971445,
 0.9733608682617317,
 0.028525896055540123,
 0.0023432224655087763,
 0.018794977291813564,
 0.012494134339237295,
 0.6688961455981225]