In [2]:
#带加密
import math
import time
import numpy as np
from phe import paillier
import pandas as pd
from sklearn import datasets
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics  import accuracy_score, f1_score

In [7]:
pk, sk = paillier.generate_paillier_keypair()
p1=pk.encrypt(10)
p2 = pk.encrypt(20)
P1 = p1+p2
P2 = 20*p1
P1 = p1 + p2 + 2*10*(p2)

print("P1:{}".format(sk.decrypt(P1)))
print("P2:{}".format(sk.decrypt(P2)))



P1:430
P2:200


In [3]:
class Client:
    def __init__(self, config):
        ## 模型参数
        self.config = config
        ## 中间计算结果
        self.data = {}
        ## 与其他节点的连接状况
        self.other_client = {}
    
    ## 与其他参与方建立连接
    def connect(self, client_name, target_client):
        self.other_client[client_name] = target_client
    
    ## 向特定参与方发送数据
    def send_data(self, data, target_client):
        target_client.data.update(data)

In [None]:
#辅助节点
class ClientC(Client):
    """
    Client C as trusted dealer.
    """
    def __init__(self, A_d_shape, B_d_shape, config):
        super().__init__(config)
        self.A_data_shape = A_d_shape
        self.B_data_shape = B_d_shape
        self.public_key = None
        self.private_key = None
        self.epoch = 0
        ## 保存训练中的损失值（泰展开近似）
        self.loss = []

    	##参数的更新
    def update_weight(self, dJ_a):
        self.weights = self.weights - self.config["lr"] * dJ_a / len(self.X)
        return

    
    def task_1(self, client_A_name, client_B_name):
        """
        生成Paillier的密钥对
        """
        try:
            public_key, private_key = paillier.generate_paillier_keypair()
            self.public_key = public_key
            self.private_key = private_key
        except Exception as e:
            print("C step 1 error 1: %s" % e)

        data_to_AB = {"public_key": public_key}
        self.send_data(data_to_AB, self.other_client[client_A_name])
        self.send_data(data_to_AB, self.other_client[client_B_name])
        return
    
    def task_2(self,client_A_name,client_B_name):
        """
        解密由A,B发来的加密梯度和loss,step4
        """
        dt = self.data
        assert "encrypted_loss" in dt.keys(), "Error: 'encrypted_loss' from A in step 2 not successfully received."
        assert "encrypted_gradient_B" in dt.keys(), "Error: 'encrypted_gradient_B' from B in step 3 not successfully received."
        assert "encrypted_gradient_A" in dt.keys(), "Error: 'encrypted_gradient_A' from A in step 2 not successfully received."

        encrypted_loss = dt[encrypted_loss]
        encrypted_gradient_B = dt[encrypted_gradient_B]
        encrypted_gradient_A = dt[encrypted_gradient_A]

        loss = self.private_key.decrypt(encrypted_loss)
        gradient_B = self.private_key.decrypt(encrypted_gradient_B)
        gradient_A = self.private_key.decrypt(encrypted_gradient_A)
        self.epoch += 1

        print("epoch{} loss: {}".format(self.epoch,loss))

        data_to_A = {"gradient_A":gradient_A}
        data_to_B = {"gradient_B":gradient_B}

        self.send_data(data_to_A,client_A_name)
        self.send_data(data_to_B,client_B_name)

In [None]:
#主动方
class ClientA(Client):
    def __init__(self, X, y, config):
        super().__init__(config)
        self.X = X
        self.y = y
        self.weights = np.zeros(X.shape[1])
        
    def compute_z_a(self):
        z_a = np.dot(self.X, self.weights)
        return z_a
    
    	##参数的更新
    def update_weight(self, dJ_a):
        self.weights = self.weights - self.config["lr"] * dJ_a / len(self.X)
        return
    
    
	## 加密梯度的计算，对应step4
    def compute_encrypted_dJ_a(self, encrypted_u):
        encrypted_dJ_a = self.X.T.dot(encrypted_u) + self.config['lambda'] * self.weights
        return encrypted_dJ_a
    
	##参数的更新
    def update_weight(self, dJ_a):
        self.weights = self.weights - self.config["lr"] * dJ_a / len(self.X)
        return
    
    #step2
    def task_1(self,client_B_name,client_C_name):
        """
        计算加密的loss,g_a,和用于计算梯度的[d]
        """
        try:
            dt = self.data
            assert "public_key" in dt.keys(), "Error: 'public_key' from C in step 1 not successfully received."
            pk = dt['public_key']
        except Exception as e:
            print("A step 1 exception: %s" % e)
        try:
            z_a = self.compute_z_a()
            z_a_square = z_a ** 2
            encrypted_z_a = np.asarray([pk.encrypt(x) for x in z_a])
            encrypted_z_a_square = np.asarray([pk.encrypt(x) for x in z_a_square])
            dt.update({"z_a": z_a})
        except Exception as e:
            print("Wrong 1 in A: %s" % e)

        ##计算加密loss，loss为了决定啥时候停止训练
        encrypted_z_b = dt["encrypted_z_b"]
        encrypted_z_b_square = dt["encrypted_z_b_square"]

        enctyted_z = encrypted_z_a + encrypted_z_b
        encrypted_z_square = encrypted_z_b_square + encrypted_z_a_square + 2*z_a*(encrypted_z_b)
        
        encrypted_loss = np.sum(0.125*encrypted_z_square-0.5*self.y*enctyted_z)     #其他项与训练无关，为了简化省去其他项

        ##计算残差项d，算梯度
        encrypted_d = 0.25 * enctyted_z - 0.5 * pk.encrypt(self.y)

        dt.update({"encrypted_loss":encrypted_loss,"encrypted_d": encrypted_d})
        
        #计算自己的梯度
        encrypted_gradient_A = self.X.T.dot(encrypted_d) + self.config['lambda'] * self.weights

        data_to_C = {"encrypted_loss":encrypted_loss,"encrypted_gradient_A":encrypted_gradient_A}
        self.send_data(data_to_C,client_C_name)

        data_to_B = {"encrypted_d":encrypted_d}
        self.send_data(data_to_B,client_B_name)

    ##step5,更新本地参数
    def task_2(self):
        """
        A更新自己的参数
        """
        dt = self.data
        assert "gradient_A" in dt.keys(), "Error: 'gradient_A' from C in step 4 not successfully received."
        self.update_weight(dt[gradient_A])
        print(f"A weight: {self.weights}")


    



In [None]:
#参与方
class ClientB(Client):
    def __init__(self, X, config):
        super().__init__(config)
        self.X = X
        self.weights = np.zeros(X.shape[1])
        self.data = {}
        
    # def compute_u_b(self):
    #     z_b = self.compute_z_b()
    #     u_b = 0.25 * z_b - 0.5 * self.y
    #     return u_b
    
    def compute_z_b(self):
        z_b = np.dot(self.X, self.weights)  
        return z_b

    def compute_encrypted_dJ_b(self, encrypted_u):
        """
        计算B的加密梯度
        """      
        encrypted_dJ_b = self.X.T.dot(encrypted_u) + self.config['lambda'] * self.weights
        return encrypted_dJ_b

    def update_weight(self, dJ_b):
        """
        更新本地参数
        """
        self.weights = self.weights - self.config["lr"] * dJ_b / len(self.X)

    #step1
    def task_1(self,client_A_name):
        """
        B生成自己的[W*X]和[(W*X)**2],发给A
        """
        dt = self.data
        assert "public_key" in dt.keys(), "Error: 'public_key' from C in step 1 not successfully received."
        pk = dt["public_key"]
        z_b = self.compute_z_b()
        z_b_square = z_b ** 2
        try:
            encrypted_z_b = np.asarray([pk.encrypt(x) for x in z_b])
            encrypted_z_b_square = np.asarray([pk.encrypt(x) for x in z_b_square])
        except Exception as e:
            print("Encypt fail, Wrong 1 in B: %s" % e)
        dt.update({"encrypted_z_b": encrypted_z_b})
        data_to_A = {"encrypted_z_b": encrypted_z_b,"encrypted_z_b_square":encrypted_z_b_square}
        self.send_data(data_to_A,self.other_client[client_A_name])

    #step3
    def task_2(self,client_C_name):
        """
        B计算自己的加密梯度
        """
        dt = self.data
        assert "encrypted_d" in dt.keys(), "Error: 'encrypted_d' from A in step3 not successfully received."

        #计算自己的梯度
        encrypted_d = dt["encrypted_d"]
        encrypted_gradient_B = self.X.T.dot(encrypted_d) + self.config['lambda'] * self.weights

        data_to_C = {"encrypted_gradient_B":encrypted_gradient_B}

        self.send_data(data_to_C,client_C_name)

    ##step5,更新本地梯度
    def task_2(self):
        """
        B更新自己的参数
        """
        dt = self.data
        assert "gradient_B" in dt.keys(), "Error: 'gradient_B' from C in step 4 not successfully received."
        self.update_weight(dt[gradient_B])
        print(f"B weight: {self.weights}")
        
        

In [None]:
def load_data():
    # 加载数据
    breast = load_breast_cancer()
    # 数据拆分
    X_train, X_test, y_train, y_test = train_test_split(breast.data, breast.target, random_state=1)
    # 数据标准化
    std = StandardScaler()
    X_train = std.fit_transform(X_train)
    X_test = std.transform(X_test)
    return X_train, y_train, X_test, y_test


## 将特征分配给A和B
def vertically_partition_data(X, X_test, A_idx, B_idx):
    """
    Vertically partition feature for party A and B
    :param X: train feature
    :param X_test: test feature
    :param A_idx: feature index of party A
    :param B_idx: feature index of party B
    :return: train data for A, B; test data for A, B
    """
    XA = X[:, A_idx]  
    XB = X[:, B_idx]  
    XB = np.c_[np.ones(X.shape[0]), XB]
    XA_test = X_test[:, A_idx]
    XB_test = X_test[:, B_idx]
    XB_test = np.c_[np.ones(XB_test.shape[0]), XB_test]
    return XA, XB, XA_test, XB_test

In [None]:
def vertical_logistic_regression(X, y, X_test, y_test, config):
    """
    Start the processes of the three clients: A, B and C.
    :param X: features of the training dataset
    :param y: labels of the training dataset
    :param X_test: features of the test dataset
    :param y_test: labels of the test dataset
    :param config: the config dict
    :return: True
    """
    
    ## 获取数据
    XA, XB, XA_test, XB_test = vertically_partition_data(X, X_test, config['A_idx'], config['B_idx'])   #把数据按照特征分开
    print('XA:',XA.shape, '   XB:',XB.shape)
    
    ## 各参与方的初始化
    client_A = ClientA(XA, config)
    print("Client_A successfully initialized.")
    client_B = ClientB(XB, y, config)
    print("Client_B successfully initialized.")
    client_C =  ClientC(XA.shape, XB.shape, config)
    print("Client_C successfully initialized.")
    
    ## 各参与方之间连接的建立
    client_A.connect("B", client_B)
    client_A.connect("C", client_C)
    client_B.connect("A", client_A)
    client_B.connect("C", client_C)
    client_C.connect("A", client_A)
    client_C.connect("B", client_B)
    
    ## 训练
    t = 0
    for i in range(config['n_iter']):
        client_C.task_1("A", "B")   #生成paillier的密钥对
        # stime = time.time()
        client_B.task_1("A")
        # etime = time.time()
        # t += etime - stime
        client_A.task_1("B")
        # stime = time.time()
        client_C.task_2("A", "B")
        client_A.task_2("C")
        # etime = time.time()
        # t += etime - stime
        client_B.task_2("C")

        y_pred = XA_test.dot(client_A.weights) + XB_test.dot(client_B.weights)
        scaler = MinMaxScaler(feature_range=(0, 1))
        normalized_arr = scaler.fit_transform(y_pred.reshape(-1, 1)).flatten()
        acc = accuracy_score(y_test, np.round(normalized_arr))
        print("******acc: ", acc, "******")
        print('f1-score:',f1_score(y_test,np.round(normalized_arr)))
        print('time:',t)
    
    print("All process done.")
    return True

In [None]:
config = {
    'n_iter': 10,   #训练轮次
    'lambda': 10,   
    'lr': 0.05,     #学习率
    'A_idx': [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
              20, 21, 22, 23, 24, 25, 26, 27, 28, 29],  #a的特征空间
    'B_idx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],            #b的特征空间
}

X, y, X_test, y_test = load_data()                      #乳腺癌数据集，二分类任务，每个数据有30各数值型特征，目标变量是良性或恶性
vertical_logistic_regression(X, y, X_test, y_test, config)