In [3]:
import numpy as np

class VerticalParty:
    """
    Represents a party holding a vertical partition of the data (a subset of features).
    """
    def __init__(self, name, data):
        if not isinstance(data, np.ndarray):
            raise TypeError("Data must be a numpy array.")
        self.name = name
        # Center the data upon initialization
        self.data = data - np.mean(data, axis=0)
        self.n_samples, self.n_features = self.data.shape

    def compute_local_covariance(self):
        """
        Computes the covariance matrix for this party's features.
        """
        print(f"Party {self.name}: Computing local covariance matrix.")
        return self.data.T @ self.data

class Coordinator:
    """
    Represents the server or coordinator that assembles the final matrix and runs PCA.
    """
    def __init__(self):
        self.global_covariance = None
        self.principal_components = None

    def securely_compute_cross_covariance(self, party_A, party_B):
        """
        *** SIMULATION ONLY ***
        This function simulates a secure multi-party computation protocol
        (e.g., using Homomorphic Encryption) to compute X_A^T * X_B.
        In a real-world scenario, this would be a complex cryptographic exchange.
        """
        print("Coordinator: Securely computing cross-covariance (simulation)...")
        # In reality, parties would not share self.data.
        # This is a placeholder for the result of the secure protocol.
        cross_covariance = party_A.data.T @ party_B.data
        return cross_covariance

    def run_vertical_pca(self, party_A, party_B, n_components):
        """
        Orchestrates the entire vertically partitioned PCA process.
        """
        # 1. Get local covariances
        cov_AA = party_A.compute_local_covariance()
        cov_BB = party_B.compute_local_covariance()

        # 2. Get cross-covariance through secure computation
        cov_AB = self.securely_compute_cross_covariance(party_A, party_B)
        cov_BA = cov_AB.T

        # 3. Assemble the full covariance matrix
        print("Coordinator: Assembling global covariance matrix.")
        # np.block provides a clean way to build block matrices
        self.global_covariance = np.block([
            [cov_AA, cov_AB],
            [cov_BA, cov_BB]
        ])

        # 4. Perform PCA on the global matrix
        eigenvalues, eigenvectors = np.linalg.eigh(self.global_covariance)
        sorted_indices = np.argsort(eigenvalues)[::-1]
        self.principal_components = eigenvectors[:, sorted_indices][:, :n_components]
        print(f"Coordinator: PCA complete. Found {self.principal_components.shape[1]} principal components.")
        return self.principal_components

# --- Simulation ---

if __name__ == "__main__":
    # 1. Generate and vertically partition data
    print("--- 1. Data Generation and Partitioning ---")
    np.random.seed(42)
    # 150 samples, 10 features total
    full_data = np.random.rand(150, 10)

    # Party A gets the first 4 features, Party B gets the next 6
    party_A_data = full_data[:, 0:4]
    party_B_data = full_data[:, 4:10]

    party_A = VerticalParty(name="A", data=party_A_data)
    party_B = VerticalParty(name="B", data=party_B_data)

    # 2. Coordinator runs the federated PCA process
    print("\n--- 2. Federated Vertical PCA Process ---")
    coordinator = Coordinator()
    n_principal_components = 3
    federated_pcs = coordinator.run_vertical_pca(party_A, party_B, n_components=3)
    print(f"\nFederated Principal Components (shape: {federated_pcs.shape}):\n", federated_pcs)

    # 3. Validation: Compare with centralized PCA
    print("\n--- 3. Validation with Centralized PCA ---")
    centralized_data = full_data - np.mean(full_data, axis=0)
    centralized_cov = centralized_data.T @ centralized_data
    eig_vals, eig_vecs = np.linalg.eigh(centralized_cov)
    sorted_idx = np.argsort(eig_vals)[::-1]
    centralized_pcs = eig_vecs[:, sorted_idx][:, :n_principal_components]
    print(f"Centralized Principal Components (shape: {centralized_pcs.shape}):\n", centralized_pcs)

    # Check if the results are close (they should be identical up to sign flips)
    assert np.allclose(np.abs(federated_pcs), np.abs(centralized_pcs))
    print("\n✅ Verification successful: Federated results match centralized PCA.")

--- 1. Data Generation and Partitioning ---

--- 2. Federated Vertical PCA Process ---
Party A: Computing local covariance matrix.
Party B: Computing local covariance matrix.
Coordinator: Securely computing cross-covariance (simulation)...
Coordinator: Assembling global covariance matrix.
Coordinator: PCA complete. Found 3 principal components.

Federated Principal Components (shape: (10, 3)):
 [[-0.42051181 -0.0658889  -0.48382616]
 [ 0.61660298 -0.01977591 -0.1280178 ]
 [ 0.44655481  0.09503789 -0.40856453]
 [-0.06088068  0.10191074 -0.47266772]
 [-0.25086277 -0.04910853 -0.40891301]
 [-0.00828997  0.36740283  0.11230176]
 [-0.35299572  0.07244922  0.17725084]
 [ 0.17023818 -0.39749757  0.20188641]
 [ 0.09521892  0.75222408 -0.06324287]
 [-0.11915709  0.33069444  0.32122695]]

--- 3. Validation with Centralized PCA ---
Centralized Principal Components (shape: (10, 3)):
 [[-0.42051181 -0.0658889  -0.48382616]
 [ 0.61660298 -0.01977591 -0.1280178 ]
 [ 0.44655481  0.09503789 -0.40856453