In [1]:
import numpy as np
import math
import itertools
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.models import clone_model
from datasets import get_data, get_training_data
from models import get_model, resnet_v1, resnet_v2
from util import select_clean_uncertain, combine_result, inject_noise
import time
import argparse
from tensorflow.python.lib.io import file_io
from keras.utils import np_utils, multi_gpu_model
from keras import backend as K
from io import BytesIO
from loss_acc_plot import loss_acc_plot
from keras.datasets import mnist, cifar10, cifar100, fashion_mnist

In [37]:
NUM_CLASSES = {'mnist': 10, 'svhn': 10, 'cifar-10': 10, 'cifar-100': 100, 'celeb': 20}
dataset = "cifar-10"
init_noise_ratio = 0
data_ratio = 100.0
X_train, y_train, X_test, y_test, un_selected_index = get_data(dataset, init_noise_ratio, data_ratio, random_shuffle=False)

X_train: (50000, 32, 32, 3)
y_train: (50000, 10)
X_test: (10000, 32, 32, 3)
y_test (10000, 10)


In [6]:
n_client = 3
client_data_number_interval = 2000
clients = []
clients_train_data = []
clients_train_label = []

In [7]:
cursor = 0
image_shape = X_train.shape[1:]
server =  get_model(dataset, input_tensor=None, input_shape=image_shape, num_classes=NUM_CLASSES[dataset])
# initialize n_client models, and the correspondante training data for each client
# each client has different data size
for i in range(n_client):
    clients.append(get_model(dataset, input_tensor=None, input_shape=image_shape, num_classes=NUM_CLASSES[dataset]))
    clients_train_data.append(X_train[cursor: cursor+client_data_number_interval*(i+1)])
    clients_train_label.append(y_train[cursor: cursor+client_data_number_interval*(i+1)])
    cursor = cursor + client_data_number_interval*(i)

In [8]:
optimizer = SGD(lr=0.01, decay=1e-4, momentum=0.9)
server.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])
for i in range(n_client):
    clients[i].compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [9]:
datagen = ImageDataGenerator(
        featurewise_center = False,  # set input mean to 0 over the dataset
        samplewise_center = False,  # set each sample mean to 0
        featurewise_std_normalization = False,  # divide inputs by std of the dataset
        samplewise_std_normalization = False,  # divide each input by its std
        zca_whitening = False,  # apply ZCA whitening
        rotation_range = 0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range = 0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range = 0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip = False,  # randomly flip images
        )
datagen.fit(X_train)

In [10]:
batch_size = 64
results = []
# 2 is only for quick test, need to set a large number
epochs = 2
for i in range(n_client):
    results.append(
        clients[i].fit_generator(datagen.flow(clients_train_data[i], clients_train_label[i], batch_size=batch_size),
                                steps_per_epoch=clients_train_data[i].shape[0]//batch_size, epochs=epochs,
                                validation_data=(X_test, y_test)
                                )
    )

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2


In [43]:
def aggregate_weights(client_list):
    n_client = len(client_list)
    if n_client == 0:
        print('empty input')
        return
    n_layers = len(client_list[0].get_weights())
    # copy the weights and structure from last client
    result = client_list[n_client-1].get_weights().copy()
    for k in range(n_client):
        result[k] = result[k]*(1.0/n_client)    
    for i in range(n_layers):
        # Using n_client -1: because result contains already last client's weights
        for j in range(n_client-1):
            result[i] = result[i] + client_list[j].get_weights()[i]*(1.0/n_client)            
    return result

In [12]:
weights = aggregate_weights(clients)

In [13]:
server.set_weights(weights)

In [14]:
print("Evaluate on test data with all model aggregation")
results = server.evaluate(X_test, y_test, batch_size=batch_size)
print("test loss, test acc:", results)
# accuracy_aggregate is the baseline to calculate influence
# influence is the difference between accurate_aggregatea and accuracy without one of client
accuracy_aggregate = results[1]

Evaluate on test data with all model aggregation
test loss, test acc: [9.85114574432373, 0.1031000018119812]


In [15]:
print("Conducting Shapley Value")

Conducting Shapley Value


In [50]:
def make_all_subsets(n_client):
    client_list = list(np.arange(n_client))
    set_of_all_subsets = set([])
    for i in range(len(client_list),-1,-1):
        for element in itertools.combinations(client_list,i):
            set_of_all_subsets.add(frozenset(element))
    return sorted(set_of_all_subsets)

In [53]:
l1 = make_all_subsets(3)

In [58]:
l1[0] 

frozenset()

In [59]:
l1[0] == frozenset()

True

In [60]:
l1[0].difference(set([0]))

frozenset()

In [65]:
def make_all_subsets(n_client):
    client_list = list(np.arange(n_client))
    set_of_all_subsets = set([])
    for i in range(len(client_list),-1,-1):
        for element in itertools.combinations(client_list,i):
            set_of_all_subsets.add(frozenset(element))
    return sorted(set_of_all_subsets)

def calculate_shapley_values(n_client):
        print("*************")
        client_list = list(np.arange(n_client))
        shapley = []
        clientShapley = 0
        total = 0
        factorialTotal = math.factorial(n_client)
        set_of_all_subsets = make_all_subsets(n_client)
        for client in client_list:
            for subset in set_of_all_subsets:
                if client in subset:
                    remainderSet = subset.difference(set([client]))   
                    remainder_res = [0,0]
                    sub_weights = aggregate_weights(np.array(clients)[list(subset)])
                    server.set_weights(sub_weights)
                    sub_res = server.evaluate(X_test, y_test, batch_size=batch_size)
                    b = len(remainderSet)
                    factValue = (len(client_list) - b -1)                  
                    if remainderSet != frozenset():                                            
                        remainder_weights = aggregate_weights(np.array(clients)[list(remainderSet)])
                        server.set_weights(remainder_weights)
                        remainder_res = server.evaluate(X_test, y_test, batch_size=batch_size)
                    difference = sub_res[1] - remainder_res[1]
                    divisor = (math.factorial(factValue) * math.factorial(b) * 1.0) / (factorialTotal * 1.0)
                    weightValue = divisor * difference
                    clientShapley += weightValue
            shapley.append(clientShapley)
            print("Shapley Value of Client " + str(client) + ": " + str(clientShapley))
            total = total + clientShapley
            clientShapley = 0
        print("Shapley Value in list: "+ str(shapley))
        print("Total: " + str(total) + " *** Note: This should equal to 1.0")
        print("*************")

In [66]:
calculate_shapley_values(n_client)

*************
Shapley Value of Client 0: 0.04676666359106699
Shapley Value of Client 1: 0.02686666945616404
Shapley Value of Client 2: 0.029466668764750164
Shapley Value in list: [0.04676666359106699, 0.02686666945616404, 0.029466668764750164]
Total: 0.10310000181198119 *** Note: This should equal to 1.0
*************


In [4]:
y_train.shape

(50000, 10)

In [5]:
def other_class(n_classes, current_class):
    """
    Returns a list of class indices excluding the class indexed by class_ind
    :param nb_classes: number of classes in the task
    :param class_ind: the class index to be omitted
    :return: one random class that != class_ind
    """
    #print(current_class)
    if current_class < 0 or current_class >= n_classes:
        error_str = "class_ind must be within the range (0, nb_classes - 1)"
        raise ValueError(error_str)

    other_class_list = list(range(n_classes))
    other_class_list.remove(current_class)
    other_class = np.random.choice(other_class_list)
    return other_class

In [43]:
np.argmax(y_train, axis = 1)

array([6, 9, 9, ..., 9, 1, 1])

In [42]:
y_train[0]

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.], dtype=float32)

In [None]:
to_categorical()

In [None]:
def set_to_one(y_train):
    y = y_train.copy()
    n_class = y.shape[1]  
    for i in range(y.shape[0]):
        y[i] = np_utils.to_categorical(0, n_class)
    return y

In [None]:
def set_to_one(y_train):
    y = y_train.copy()
    n_class = y.shape[1]  
    for i in range(y.shape[0]):
        y[i] = np_utils.to_categorical(1, n_class)
    return y

In [40]:
def flip_label(y_train):
    y = y_train.copy()
    n_class = y.shape[1]  
    for i in range(y.shape[0]):
        y[i] = np_utils.to_categorical(other_class(n_class, y[i].argmax(axis=-1)), n_class)
    return y

In [54]:
result = np.argmax(flip_label(y_train), axis = 1)
print(result)

[7 0 2 ... 2 5 4]


In [28]:
np.argmax(y_train, axis = 1)[-3]

9

In [26]:
result[-3]

9

In [21]:
np.argmax(flip_label(y_train), axis = 1)

array([3, 7, 5, ..., 9, 8, 1])

In [31]:
y_train[0].argmax(axis=-1)

2

In [35]:
list(set([1,2]))

AttributeError: 'list' object has no attribute 'toarray'

In [36]:
np.array(clients)[list(set([1,2]))]

array([<tensorflow.python.keras.engine.training.Model object at 0x14bca6f60>,
       <tensorflow.python.keras.engine.training.Model object at 0x14c44e9e8>],
      dtype=object)

In [48]:
calculate_shapley_values(n_client)

*************


TypeError: 'numpy.int64' object is not iterable

In [26]:
frozenset({0}).difference(set(0))

TypeError: 'int' object is not iterable

In [28]:
set([0])

{0}

In [49]:
make_all_subsets(3)

[frozenset(),
 frozenset({2}),
 frozenset({1}),
 frozenset({1, 2}),
 frozenset({0}),
 frozenset({0, 1}),
 frozenset({0, 2}),
 frozenset({0, 1, 2})]