In [1]:
import os
import logging
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, losses, optimizers, metrics
from  tqdm import *

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [4]:
# 全局参数
NUM_USER = 100
np.random.seed(12)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s')
import warnings
warnings.filterwarnings("ignore")

In [5]:
# 可调参数
loss_fn = losses.SparseCategoricalCrossentropy()
metrics_list = [tf.keras.metrics.Accuracy()]
# optimizer = optimizers.SGD(learning_rate=1e-2)
optimizer_class = optimizers.SGD
E = 3
batch_size = 20
hidden_unit = 100
# beta = 0.99

In [8]:
import pickle

# 加载数据
with open('../data/mnist/train_array.pkl', 'rb') as f:
    train_data = pickle.load(f)
with open('../data/mnist/test_array.pkl', 'rb') as f:
    test_data = pickle.load(f)

# 计算相似矩阵
W = np.zeros((NUM_USER, NUM_USER))
select_digit = list(map(np.unique, [test_data['user_data'][i]['y'] for i in range(NUM_USER)]))
W = [len(np.intersect1d(select_digit[i], select_digit[j]))/4. for i in range(NUM_USER) for j in range(NUM_USER)]
W = np.array(W).reshape(NUM_USER, -1)
D = np.diag(np.sum(W, axis=1))

In [9]:
def normalization(x):
    return x / np.sum(x)

class Mnist_PL(tf.Module):
    def __init__(self, hidden_dim):
        super(Mnist_PL, self).__init__()
        self.fc_1 = layers.Dense(hidden_dim, activation='relu')
        self.fc_20 = layers.Dense(10)
        self.fc_21 = layers.Dense(10)
        self.c = tf.Variable(normalization(tf.random.uniform([2, 1], maxval=1, dtype=tf.float32)), trainable=False,
                             dtype=tf.float32)

    def __call__(self, x):
        x = self.fc_1(x)
        a_20 = self.fc_20(x)
        a_21 = self.fc_21(x)
        self.z = layers.concatenate([a_20, a_21]).numpy()  # storge for update c
        output = layers.Add()([tf.multiply(self.c[0], a_20),
                               tf.multiply(self.c[1], a_21),
                               ])
        output = layers.Softmax()(output)
        return output

## 客户端模型

In [10]:
class Client():
    def __init__(self, id, hidden_unit, train_data={'x':[], 'y':[]}, 
                 test_data={'x':[], 'y':[]}, **kwargs):
        self.id = id
        self.train_data = {'x':np.array(train_data['x']), 
                           'y':np.array(train_data['y'])}
        self.test_data = {'x':np.array(test_data['x']), 
                           'y':np.array(test_data['y'])}
        self.train_samples = len(train_data['y'])
        self.test_samples = len(test_data['y'])
        self.model = Mnist_PL(hidden_unit)
        self.index = np.arange(self.train_samples)
        for key, value in kwargs.items():
            setattr(self, key, value)
        
        assert hasattr(self, 'E')
        assert hasattr(self, 'optimizer')
        assert hasattr(self, 'lr')  # 
        assert hasattr(self, 'loss_fn')
        assert hasattr(self, 'metrics')
        self.init_model()
        self.optimizer = kwargs['optimizer'](self.lr)
        self.kwargs = kwargs
    
    def init_model(self):
        '''
        using call method to initialize net parameters.
        '''
        init_feature = tf.cast(self.test_data['x'][:5], dtype=tf.float32)
        _ = self.model(init_feature)
    
    def forward(self, communication_round=None):
        if communication_round<=40:
            self.optimizer = self.kwargs['optimizer'](1e-1)
        else:
            self.optimizer = self.kwargs['optimizer'](5e-2)
        np.random.shuffle(self.index)
        self.select_index = self.index[:self.batch_size * self.E]

        train_set = tf.data.Dataset.from_tensor_slices((self.train_data["x"][self.select_index],
                                                        self.train_data["y"][self.select_index])).batch(self.batch_size)
        local_round = 0
        for feature, label in train_set:
            with tf.GradientTape() as tape:
                predict = self.model(feature)  # without softmax
                loss = self.loss_fn(label, predict)
            grads = tape.gradient(loss, self.model.trainable_variables)
            self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
            local_round += 1
        return (self.model.trainable_variables, self.model.c.numpy())
    
    def update_variables(self, new_variables):
        """
        :param
            new_variables: tuple, each element is ndarray
        :result: None, only copy server model
        """
        assert len(self.model.trainable_variables) == len(new_variables)
        for i in range(len(new_variables)):
            self.model.trainable_variables[i].assign(new_variables[i])


    def update_c(self, new_variables, D_row, W_row):
        """
        :param
            D_row: 2λ W.dot(C)[i, :]
            W_row: 2λ D.dot(C)[i, :]
        """
        self.update_variables(new_variables)
        c_old = self.model.c.numpy()  # ndim = 2
        c_new = np.zeros_like(c_old)  # ndim = 2
        batch_x = self.train_data['x'][self.select_index]
        batch_y = self.train_data['y'][self.select_index]
        
        batchx_tensor = tf.cast(batch_x, dtype=tf.float32)
        predict = self.model(batchx_tensor).numpy()
        
        batchy_tensor = tf.cast(batch_y, tf.int32)
        onehot_y = tf.one_hot(batchy_tensor, 10).numpy()  # shape is (N, 10), numpy
        # Z = np.transpose(self.model.z.reshape(-1, 10, c_num), [0, 2, 1])
        Z = np.transpose(self.model.z.reshape(-1, 10, 2), [0, 2, 1])  # numpy, shape is (N, 10, 10)
        predict = np.expand_dims(predict, axis=2)
        onehot_y = np.expand_dims(onehot_y, axis=2)
        # Z_T = self.model.z.reshape(-1, 10, c_num)
        Z_T = self.model.z.reshape(-1, 10, 2)
        # one_hot for y
        for i in range(self.model.c.shape[0]):
            c_new[i] = c_old[i] * ((np.squeeze(tf.reduce_mean(tf.matmul(Z, onehot_y) ,axis=0).numpy()) + W_row)[i] +
            tf.reduce_mean(tf.matmul(Z, predict), axis=0).numpy().T.dot(c_old) + D_row.T.dot(c_old.ravel())) / (
               (np.squeeze(tf.reduce_mean(tf.matmul(Z, predict), axis=0).numpy()) + D_row)[i] + tf.reduce_mean(tf.matmul(Z, onehot_y), axis=0).numpy().T.dot(c_old) + 
                W_row.T.dot(c_old.ravel()))
        
        # c_[i] = self.c[i] * ((X_.T.dot(y) + W_row)[i] + X_.T.dot(X_).dot(self.c).T.dot(self.c) + D_row.T.dot(self.c)) / ((X_.T.dot(X_).dot(self.c)+D_row)[i] + X_.T.dot(y).T.dot(self.c) + W_row.T.dot(self.c))
        
        c_new[c_new<0] = 1e-6
        c_new = normalization(c_new)
        assert np.ndim(c_new) == 2
        self.model.c.assign(c_new)
    
    def md_c(self, new_variables, D_row, W_row, lr=1e-1):
        self.update_variables(new_variables)
        batch_x = self.train_data['x'][self.select_index]
        batch_y = self.train_data['y'][self.select_index]
        batch_x = tf.cast(batch_x, dtype=tf.float32)
        with tf.GradientTape() as tape:
            tape.watch(self.model.c)
            prediction = self.model(batch_x)
            loss = loss_fn(batch_y, prediction)
        grads_c = tape.gradient(loss, self.model.c)
        assert grads_c is not None
        grads_c += tf.reshape(tf.cast(D_row-W_row, dtype=tf.float32), self.model.c.shape) 
        grads_c = self.model.c * tf.exp(-lr * grads_c)  # TODO
        self.model.c.assign(grads_c/tf.reduce_sum(grads_c))
        
    
    def test(self):
        test_set = tf.data.Dataset.from_tensor_slices((self.test_data["x"], self.test_data["y"])).batch(self.test_samples)
        train_set = tf.data.Dataset.from_tensor_slices((self.train_data["x"], self.train_data["y"])).batch(self.train_samples)
        for feature, label in test_set:
            output = self.model(feature)
            loss = self.loss_fn(label, output).numpy()
            
        prediction = tf.argmax(output, axis=-1).numpy()

        metric_result = []
        for metric in self.metrics:
            metric.reset_states()
            _ = metric.update_state(self.test_data['y'], prediction)
            metric_result.append(metric.result().numpy())
        
        for feature, label in train_set:
            output = self.model(feature)
            train_loss = self.loss_fn(label, output).numpy()
        # group_idx = tf.argmax(self.model.c, axis=1).numpy()[0]
        group_idx = tf.argmax(self.model.c, axis=0).numpy()[0]
        return (loss, metric_result, train_loss, group_idx)

## 服务端模型

In [11]:
class Server():
    def __init__(self, hidden_unit, train_data, test_data, E, optimizer, loss_fn, metrics, lamb, 
                    batch_size, epoches, D, W, 
                    lr, #
                    filename='/home/dihao/code/PFedL/preliminary/Result/mnist_pfedl.txt'):
        self.hidden_unit = hidden_unit
        self.train_data = train_data
        self.test_data = test_data
        self.E = E
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.metrics = metrics
        self.lamb = lamb
        self.batch_size = batch_size
        self.epoches = epoches
        self.model = Mnist_PL(hidden_unit)

        self.init_model()
        self.lastest_model = self.get_parameters()
        # self.clients = self.setup_clients()
        self.clients = self.setup_clients(lr)
        self.C = None
        self.file = filename
        self.D = D
        self.W = W

    def init_model(self):
        '''
        using call method to initialize net parameters.
        '''
        init_feature = tf.cast(self.test_data[0]['x'][:5], dtype=tf.float32)
        _ = self.model(init_feature)
    
    def get_parameters(self):
        parameter = []
        for variable in self.model.trainable_variables:
            parameter.append(variable.numpy())
        return parameter
    
    def setup_clients(self, lr):
        client_list =[]
        for i in range(NUM_USER):
            client = Client(i, self.hidden_unit, train_data=self.train_data[i], test_data=self.test_data[i],
                              E=self.E, optimizer=self.optimizer, loss_fn=self.loss_fn, metrics=self.metrics,
                              batch_size=self.batch_size, lr=lr)
            client_list.append(client)
            
        return client_list
    
    def broadcast(self):
        for client in self.clients:
            client.update_variables(self.lastest_model)
    
    def train(self):
        dir_name = os.path.dirname(self.file)
        self.broadcast()  # broadcast to all clients
        for epoch in trange(self.epoches):
            client_solution = [client.forward(epoch) for client in self.clients]
            # print("old_parameter is {}".format(self.lastest_model[0]))
            self.lastest_model, self.C = self.aggragate(client_solution)
            # print("new_parameter is {}".format(self.lastest_model[0]))
            # self.update_c(self.lastest_model)
            self.md_c(self.lastest_model)
            
            if self.C is not None:
                if (epoch+1) % 20 == 0:
                    c_path = os.path.join(dir_name, "c_"+str(epoch)+".txt")
                    np.savetxt(c_path, self.C)
            round_loss, round_metrics, train_loss, c_acc= self.test()
            with open(self.file, 'a+') as f:
                f.write('At round {}, test loss is: {:.4f}, metrics result is {}, train loss is {}, c accuracy is {}'.format(epoch, round_loss, round_metrics, train_loss, c_acc))
                f.write('\n')
            logging.info('At round {}, test loss is: {:.4f}, metrics result is {}, train loss is {}, c accuracy is {}'.format(epoch, round_loss, round_metrics, train_loss, c_acc))

            
    def aggragate(self, client_solution):
        concate_C = []
        concate_V = [np.zeros_like(x) for x in self.lastest_model]
        # update lastest_model
        for i, solution in enumerate(client_solution):
            client_variables, client_c = solution
            concate_V = [concate_V[j]+client_variables[j].numpy() for j in range(len(concate_V))]
            concate_C.append(client_c)
        C = np.concatenate(concate_C).reshape(NUM_USER, 2)
        V = [element / NUM_USER*1.0 for element in concate_V]
        return (V, C)
    
    def update_c(self, lastest_model):
        for idx, client in enumerate(self.clients):
            w_row = self.lamb * self.W.dot(self.C)[idx, :]
            d_row = self.lamb * self.D[idx, idx] * self.C[idx, :]
            client.update_c(lastest_model, D_row=d_row, W_row=w_row)

    def md_c(self, lastest_model):
        for idx, client in enumerate(self.clients):
            w_row = self.lamb * self.W.dot(self.C)[idx, :]
            d_row = self.lamb * self.D[idx, idx] * self.C[idx, :]
            client.md_c(lastest_model, D_row=d_row, W_row=w_row)
    
    def test(self):
        loss = []
        metrics_list = [[] * len(self.metrics)]
        train_loss = []
        concate_idx = []
        for idx, client in enumerate(self.clients):
            client_loss, client_metrics, client_trainloss, group_idx = client.test()
            loss.append(client_loss)
            train_loss.append(client_trainloss)
            concate_idx.append(group_idx)
            for i in range(len(metrics_list)):
                metrics_list[i].append(client_metrics[i])
        
        c_acc = tf.equal([0]*50+[1]*50, concate_idx).numpy().sum() / NUM_USER
        return np.mean(loss), [np.mean(metrics_list[i]) for i in range(len(metrics_list))], np.mean(train_loss), c_acc


In [12]:
np.random.seed(12)
tf.random.set_seed(123)

In [13]:
server = Server(hidden_unit=hidden_unit, train_data=train_data['user_data'], test_data=test_data['user_data'], E=3, optimizer=optimizer_class,
loss_fn=loss_fn, metrics=metrics_list, lamb=5e-3, batch_size=20, epoches=200, D=D, W=W, lr=1e-2)

In [14]:
server.train()

0:59:39,381 - <ipython-input-11-cd3180c10b9a>[line:73] - INFO: At round 118, test loss is: 0.1993, metrics result is [0.9438212], train loss is 0.21253438293933868, c accuracy is 0.0
 60%|█████▉    | 119/200 [19:44<14:31, 10.76s/it]2021-02-21 00:59:50,066 - <ipython-input-11-cd3180c10b9a>[line:73] - INFO: At round 119, test loss is: 0.1990, metrics result is [0.9445468], train loss is 0.21208690106868744, c accuracy is 0.0
 60%|██████    | 120/200 [19:55<14:19, 10.74s/it]2021-02-21 01:00:00,144 - <ipython-input-11-cd3180c10b9a>[line:73] - INFO: At round 120, test loss is: 0.1986, metrics result is [0.9441336], train loss is 0.21154659986495972, c accuracy is 0.0
 60%|██████    | 121/200 [20:05<13:52, 10.54s/it]2021-02-21 01:00:09,448 - <ipython-input-11-cd3180c10b9a>[line:73] - INFO: At round 121, test loss is: 0.1980, metrics result is [0.9438303], train loss is 0.21102474629878998, c accuracy is 0.0
 61%|██████    | 122/200 [20:14<13:13, 10.17s/it]2021-02-21 01:00:18,825 - <ipython-i