# Kernel-PCA的一般步骤
（1） 给定样本点$X$（维度$[N, D]$，每个D维）\
（2） 利用核函数计算样本两两之间的关系矩阵 \
（3） 对$K$进行中心化 \
    ${\rm{\tilde K = K - }}{{\rm{I}}_N}K - KI_N^T + {I_N}KI_N^T$\
（4） 对$\tilde K$进行特征值分解获取较大的d个特征值， ${\lambda _i}$ 以及对应的特征向量$u_i$,并对$u_i$进行正则化 \
    ${{u'}_i} = \frac{1}{{\sqrt {{\lambda _i}} }}{u_i}$
（5） 收集d个${u'}_i$,生成降维矩阵${u'}_i$,$u' \in [d,N]$进行降维 \
$x' = K{{u'}^T}, x' \in [N,D]$

In [1]:
 # 导包
import numpy as np 
import matplotlib.pyplot as plt 

# sigmoid 核
def sigmoid(x1, x2, a = 0.25, r = 1): 
    x = np.dot(x1, x2) 
    return np.tanh(a * x + r) 
# 多项式核
def linear(x1, x2, a = 1, c = 0, d = 1): 
    x = np.dot(x1, x2)
    x = np.power((a*x + c), d) 
# 高斯核/RBF(径向基)
def rbf(x1, x2, gamma = 10): 
    x = np.dot((x1 - x2), (x1-x2))
    x = np.exp( -gamma * x)
    return x

In [2]:
def kpca(data, n_dims = 2, kernel = rbf): 
    N, D = np.shape(data) 
    K = np.zeros([N, N])
    # 利用核函数计算K 
    for i in range( N ): 
        for j in range( N ): 
            K[i, j] = kernel(data[i], data[j])
    # 对K进行中心化 
    one_n = np.ones(N, N) / N 
    K =  K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n)
    # 计算特征值和特征向量
    eig_values, eig_vector = np.linalg.eig(K) 
    idx = np.argsort( -eig_values)[:n_dims]
    # 选取较大的特征值
    eigval = eig_values[idx] 
    eigvector = eig_vector[:, idx] #[N, d] 
    # 正则化
    eigval = eigval**(1/2)
    u = eigvector/eigval.reshape(-1, n_dims) # u [N, d] 
 
    # 进行降维
    data_n = np.dot(K, u) 
    return data_n

In [3]:
def draw_pca(data, labs): 
    plt.cla()
    unique_labs = np.unique(labs)
    colors = [plt.cm.Spectral(each)
             for each in np.linspace(0, 1, len(unique_labs))]
    p = [] 
    legends = [] 
    for i in range(len(unique_labs)): 
        index = np.where( labs = unique_labs[i])
        pi = plt.scatter(datas[index, 0], datas[index, 1], c = c[colors[i]])
        p.append(pi)
        legends.append(unique_labs[i])

    plt.legend(p, legends)
    plt.show()