### sklearn例程:RBF核的顯式特征映射近似
具體來說，示例中展示了在便用SVM對數字數據集進行分類的情況下，如何使用RBFSampler和Nystroem來近似RBF核的特征映射。其中比較了使用原始空間中的線性SVM，使用近似映射和使用內核化SVM的結果。不同模型運行時間和精度的比較涉及：不同蒙特卡洛采樣樣本數量(對於RBFSampler，它使用隨機傅立葉特征)和訓練集的不同大小子集(用於Nystroem)。請注意，核近似的主要優勢在於性能提升，但這裏的數據集規模不足以顯示核近似的好處，因為精確的SVM仍然相當快。

對更多維度進行采樣顯然會帶來更好的分類結果，但代價更高。這意味著在運行時間和精度之間需要權衡，這由參數n_components給出。請注意，通過使用隨機梯度下降法（sklearn.linear_model.SGDClassifier）可以大大加快求解線性SVM以及近似核SVM的速度。對於有核函數的SVM，這是不容易實現的。
原文在官方文檔找的到
#### 參考 https://vimsky.com/zh-tw/article/4471.html

In [2]:
# simple example
# https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.RBFSampler.html#sklearn.kernel_approximation.RBFSampler

from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
X = [[0, 0], [1, 1], [1, 0], [0, 1]]
y = [0, 0, 1, 1]
rbf_feature = RBFSampler(gamma=1, random_state=1)
X_features = rbf_feature.fit_transform(X)
clf = SGDClassifier(max_iter=5, tol=1e-3)
clf.fit(X_features, y)
clf.score(X_features, y)



1.0

### RBF Kernel

In [3]:
from sklearn.gaussian_process.kernels import RBF
print(RBF()([[1, 2, 3, 4], [1, 4, 5, 6]]))

print(np.std([[1, 2, 3, 4], [1, 4, 5, 6]], ddof=1))

# 套這個公式算出來的
# k(x_i, x_j) = exp(-1 / 2 d(x_i / length_scale, x_j / length_scale)^2)
a = np.array([1,2,3,4])
b = np.array([1,4,5,6])

np.exp(- 1 / (2) * (a.dot(a) + b.dot(b) - 2 * a.dot(b)))

[[1.         0.00247875]
 [0.00247875 1.        ]]
1.8322507626258087


0.0024787521766663585

### RBFNet
https://shomy.top/2017/02/26/rbf-network/#radial-basis-function-%E4%BB%8B%E7%BB%8D

In [None]:
#!/usr/bin/env python
# encoding: utf-8
跑跑看RBF出來會是什麼
import numpy as np
from kmeans import KMeans

def getData(n=100, d=5):
    '''
    生成数据 y = 2x, 多维
    '''
    X = np.random.uniform(1., 3.0,(n,d))
    y = np.sum(X, axis=1)
    return X,y


class RBFNet(object):
    '''RBF Network
    '''
    def __init__(self, k=10, delta=0.1):
        '''
        delta: 高斯函数中的扩展参数
        beta: 隐层到输出层的权重
        k: 中心的个数
        '''
        self._delta = delta
        self._beta = None
        self._hidden_num = k
        self.kms = KMeans(k)
        pass

    def _calRBF(self,x,c):
        '''
        计算RBF函数的输出，这里使用高斯函数
        '''
        return np.exp(-self._delta* np.sqrt(np.sum(np.square(x-c))))

    def _calG(self, X):
        '''
        输入层到隐层的特征转换
        G相当于公式中的大写的Z=[z1,z2,z3...zN], N为数据样本量
        G维度：N * hidden
        '''
        num, dim = X.shape
        G = np.empty((num, self._hidden_num))
        for i in range(num):
            for j in range(self._hidden_num):
                # 计算每一个数据与所有的重心的RBF输出，作为隐层神经元的输出
                G[i,j] = self._calRBF(X[i,:], self._centers[j])

        return G

    def _calPseudoInvese(self,x):
        '''
        计算矩阵伪逆
        '''
        return np.linalg.pinv(x)

    def fit(self, train_x, train_y):
        '''
        训练函数
        '''

        num, dim = train_x.shape

        # 使用KMeans无监督确定中心
        self.kms.train(train_x)
        self._centers = self.kms._centers
        # 计算Z
        self.G = self._calG(train_x)

        # 计算权重矩阵,其中包含一个求伪逆的过程
        self._beta = self._calPseudoInvese(np.dot(np.transpose(self.G), self.G))
        self._beta = np.dot(self._beta, np.transpose(self.G))
        self._beta = np.dot(self._beta, train_y)

    def predict(self, test_x):
        '''
        预测
        test_x: 可以是多个x
        '''

        if not isinstance(test_x, np.ndarray):
            try:
                test_x = np.asarray(test_x)
            except:
                raise TypeError('np.ndarray is necessary')
        if len(test_x.shape) == 1:
            test_x = test_x.reshape(1, test_x.shape[0])

        # 计算输入x的隐层的神经元的值
        # 相当于公式中\phi(X)
        G = self._calG(test_x)

        #计算最终输出
        Y = np.dot(G, self._beta)
        return Y

def main():
    data = getData(100,5)
    rbf = RBFNet()
    rbf.fit(*data)

    test_data = getData(5, 5)
    print test_data[0]
    print 'result',test_data[1]
    print 'prediction',rbf.predict(test_data[0])

In [6]:
def getData(n=100, d=5):
    '''
    生成数据 y = 2x, 多维
    '''
    X = np.random.uniform(1., 3.0,(n,d))
    y = np.sum(X, axis=1)
    return X, y

def _calRBF(self,x,c):
    '''
    计算RBF函数的输出，这里使用高斯函数
    '''
    return np.exp(-self._delta* np.sqrt(np.sum(np.square(x-c))))

def _calG(self, X):
    '''
    输入层到隐层的特征转换
    G相当于公式中的大写的Z=[z1,z2,z3...zN], N为数据样本量
    G维度：N * hidden
    '''
    num, dim = X.shape
    G = np.empty((num, self._hidden_num))
    for i in range(num):
        for j in range(self._hidden_num):
            # 计算每一个数据与所有的重心的RBF输出，作为隐层神经元的输出
            G[i,j] = self._calRBF(X[i,:], self._centers[j])

    return G

In [2]:
getData()

(array([[2.83465053, 1.13636439, 1.27499729, 2.35188578, 1.22781898],
        [1.89588048, 1.47420873, 2.85801041, 2.42745796, 2.31387373],
        [2.33692137, 2.11525192, 1.29027281, 1.79582357, 2.12283291],
        [2.82361391, 1.00765947, 1.43939999, 1.58509882, 1.11217758],
        [2.29457448, 2.88252926, 1.27028803, 1.67573354, 1.64737119],
        [1.86108467, 2.3226627 , 1.97433589, 2.24925376, 1.7978214 ],
        [2.02148662, 1.60850219, 2.27587221, 1.86823138, 1.1308958 ],
        [1.37286194, 2.01385487, 1.33922153, 2.29292557, 1.27146645],
        [2.12510422, 2.0070134 , 2.76114481, 2.39176686, 1.82305712],
        [2.26442507, 1.90039189, 2.893561  , 1.46979073, 2.07136872],
        [1.06279562, 1.86738004, 1.14171456, 1.14287074, 1.45078407],
        [2.28784079, 1.2217599 , 1.66989646, 2.48628354, 2.51611588],
        [2.76215947, 2.14054504, 1.46484359, 1.72556211, 2.05417548],
        [2.08380277, 1.49639292, 2.46582785, 1.6110756 , 1.19873971],
        [1.88271646,

In [None]:
 def fit(self, train_x, train_y):
    '''
    训练函数
    '''

    num, dim = train_x.shape

    # 使用KMeans无监督确定中心
    self.kms.train(train_x)
    self._centers = self.kms._centers
    # 计算Z
    self.G = self._calG(train_x)

    # 计算权重矩阵,其中包含一个求伪逆的过程
    self._beta = self._calPseudoInvese(np.dot(np.transpose(self.G), self.G))
    self._beta = np.dot(self._beta, np.transpose(self.G))
    self._beta = np.dot(self._beta, train_y)

In [None]:
def _calRBF(x,c):
    '''
    计算RBF函数的输出，这里使用高斯函数
    '''
    return np.exp(-self._delta* np.sqrt(np.sum(np.square(x-c))))

train_x, train_y = getData(100,5)
num, dim = train_x.shape

hidden_num = 10

G = np.empty((num, hidden_num))  # 100 x 10
for i in range(num):
    for j in range(hidden_num):
        # 计算每一个数据与所有的重心的RBF输出，作为隐层神经元的输出
        G[i,j] = self._calRBF(X[i,:], self._centers[j])

In [None]:
跑跑看上面的RBF跑出來會是什麼

### scipy.interpolate.Rbf
https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.Rbf.html

In [2]:
from scipy.interpolate import Rbf
x, y, z, d = np.random.rand(4, 50)
rbfi = Rbf(x, y, z, d)  # radial basis function interpolator instance
xi = yi = zi = np.linspace(0, 1, 20)
di = rbfi(xi, yi, zi)   # interpolated values
di.shape

(20,)

In [3]:
x, y, z, d

(array([0.58936285, 0.63543151, 0.49643903, 0.2847877 , 0.48802029,
        0.38452706, 0.02269027, 0.12439432, 0.02988578, 0.88652587,
        0.01280844, 0.87617396, 0.0350037 , 0.59305837, 0.96076452,
        0.18575337, 0.75032287, 0.180105  , 0.26565007, 0.11431715,
        0.64813214, 0.55198703, 0.17796003, 0.38569784, 0.01453455,
        0.67025737, 0.96205462, 0.6176514 , 0.74761426, 0.9680147 ,
        0.38519579, 0.54768738, 0.16014638, 0.65111206, 0.65099098,
        0.61168268, 0.7407471 , 0.24584517, 0.38993566, 0.97426045,
        0.82705371, 0.8030124 , 0.56062956, 0.94295753, 0.43251955,
        0.31603041, 0.23570591, 0.57939462, 0.90870281, 0.98184209]),
 array([0.75181651, 0.01399039, 0.68034713, 0.49437106, 0.50341996,
        0.24031758, 0.35955041, 0.34653131, 0.13382373, 0.1771924 ,
        0.7578941 , 0.38067091, 0.25225757, 0.78025513, 0.36766739,
        0.48747424, 0.10171739, 0.10251626, 0.78106374, 0.35069561,
        0.12516761, 0.07765133, 0.1909478 , 0.