In [None]:
import numpy as np

from sklearn import svm
from sklearn.base import BaseEstimator, ClassifierMixin # baseestimator估计器的基类，classifierMixin分类器的混合类 

class RBF(object):#使用高斯核作为基本核
    """docstring for RBF"""
    def __init__(self, gamma):
        self.gamma = gamma
        
    def __call__(self, X, Y=None): # 用于评价x，y之间的相似度（距离使用欧氏距离）
        XX = np.sum(X*X, axis=1)[:, np.newaxis] # 针对第1维进行加和 0维是多行 1维是多列 [:, np.newaxis]在最后增加一维（维度为1）
        if Y is None:
            Y = X
            YY = XX.T
        else:
            YY = np.sum(Y*Y, axis=1)[np.newaxis, :]
        distances = XX + YY # Using broadcasting x^2+y^2-2xy
        distances -= 2*np.dot(X, Y.T)
        distances = np.maximum(distances, 0)
        return np.exp(-self.gamma * distances)
    
def linear(X, Y = None):
    """linear kernel"""
    if Y is None:
        Y = X
    return np.dot(X, Y.T)


class MultiKernelLinear(object):
    def __init__(self, kernels, gammas, X=None, kernel='precomputed'):
        self.kernels = kernels
        self.gammas = gammas
        self.X = X
        self.Ks = None #??
        self.kernel = kernel
        if X is not None: 
            self.Ks = [kernel(x) for kernel in kernels]
    
    def __call__(self, X, Y=None):
        """construct kernel by linear combination"""
        K = 0
        if X is self.X and (Y is X or Y is None):
            for gamma, Ki in zip(self.gammas, self.Ks):
                if gamma > 0.0:
                    K += gamma * Ki
        else:
            for gamma, kernel in zip(self.gammas, self.kernels):
                if gamma > 0.0:
                    K += gamma * kernel(X, Y)
        return K
    
    @property # 只读属性，一般与@name.setter联合使用（用于写及检测）
    def _precomputed(self):
        return self.kernel == "precomputed"
    

class MultiKernelSVC(BaseEstimator, ClassifierMixin):
    """Lp - Multiple Kernel Learning (MKL). 
    2 classes only
    
    Parameters
    --------------
    kernel : string, optional
        List of precomputed kernels.
        
    p: float, optional 
        ???
        
    C: float, optional (default=1.0)
        Penalty parameter C of the error term.
    """
    
    def __init__(self, kernels, p=1, maxit=10, C=1, verbose=False, tol=1e-5,
                store_objective=False):
        self.kernels = kernels
        self.p = p
        self.maxit = maxit # 最大迭代次数
        self.C = C
        self.verbose = verbose # 允许冗余输出
        self.tol = tol# 停止训练的误差值大小
        self.store_objective = store_objective
    
    def fit(self, X, y, **params):
        """Fit the MKL and learn the kernel."""
        self.set_params(**params)
        
        X = np.atleast_2d(X) # 不足2维的话在前面加一维
        y = y.ravel() # 将多维数组转化为1维数组
        
        classes = np.unique(y) #去除y中重复的元素，并从小到大排列
        n_classes = classes.size
        assert n_classes == 2
        
        y = np.array(y, dtype=np.int)
        y[y == classes[0]] = -1
        y[y == classes[1]] = 1
        
        p = float(self.p)
        kernels = self.kernels
        C = self.C
        
        n_kernels = len(self.kernels)
        # kernel weight
        gammas = (1.0 / n_kernels) ** (1.0 / p)*np.ones(n_kernels)
        
        # Construct kernel by linear combination
        multi_kernel = MultiKernelLinear(kernels, gammas, X)
        Ks = multi_kernel.Ks
        
        norms  = np.empty(n_kernels)# ?
        maxit = self.maxit
        
        objective = []
        
        for it in range(maxit):
            if self.verbose:
                print("Gammas : %s " % multi_kernel.gammas)
            
            svc = svm.SVC(kernel=multi_kernel, C=C)
            svc.fit(X, y)
            dual_coef_ = svc.dual_coef_.ravel() # 对偶系数，即支持向量在决策函数中的系数，在多分类问题中，这个会有所不同。
            support_ = np.array(svc.support_, dtype=np.int).ravel() - 1 # 以数组的形式返回支持向量的索引，即在所有的训练样本中，哪些样本成为了支持向量。
            
            # updata kernel weights *****
            for i, (gamma, K) in enumerate(zip(multi_kernel.gammas, Ks)):
                norms[i] = (gamma * gamma) * np.dot(dual_coef_, np.dot(K[support_][:, support_], dual_coef_))
                
            if self.store_objective:
                dual_obj = -0.5 * np.dot(
                    dual_coef_, np.dot(
                        multi_kernel(X[support_]), dual_coef_)) + (
                            dual_coef_ * y[support_]).sum()
                objective.append(dual_obj)
                
            # print norms and update gammas
            norms = norms ** (1.0 / (1.0 + p))
            scaling = np.sum(norms ** p) ** (1.0 / p)
            gammas_ = norms / scaling

            gammas_[gammas_ < 1e-6 * gammas_.max()] = 0.0

            if (gammas_ - multi_kernel.gammas).max() < self.tol:
                if self.verbose:
                    print "Converged after %d interations" % it
                break

            multi_kernel.gammas = gammas_
            else:
                if self.verbose:
                    print "Did NOT converge after %d interations" % it

        self._svc = svc
        self.gammas_ = multi_kernel.gammas
        self.objective = objective
        return self

    def predict(self, X):
        return self._svc.predict(X)