In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
from sklearn.base import BaseEstimator
from sklearn.metrics.pairwise import pairwise_kernels

class MLS_SVR(BaseEstimator):
    
    def __init__(self, gamma, lamb, kernel):
        self.gamma, self.lamb, self.kernel = gamma, lamb, kernel

    def fit(self, X, y):
        n, m = y.shape
        K = pairwise_kernels(X, metric = self.kernel)
        H = np.matlib.repmat(K, m, m) + np.eye(n * m) / self.gamma

        P = np.zeros((m * n, m))
        for i in range(m):
            j1, j2 = n*i, n*(i+1)
            H[j1:j2, j1:j2] += K * (m / self.lamb)
            P[j1:j2, i] = 1

        eta = np.linalg.lstsq(H, P)[0]
        nu = np.linalg.lstsq(H, y.reshape(-1, 1))[0]

        S = P.T @ eta
        self.b = np.linalg.inv(S) @ eta.T @ y.reshape(-1, 1)
        self.alpha = (nu - eta @ self.b).reshape(n, m)
        
        
        self.X_train = X
        self.m = m

    def predict(self, X):
        n, m = X.shape[0], self.m
        
        K = pairwise_kernels(X, self.X_train, metric = self.kernel)
        return np.matlib.repmat(np.sum(K@self.alpha, axis=1).reshape(-1, 1), 1, m) + (K @ self.alpha) *(m/self.lamb) + np.matlib.repmat(self.b.reshape(1, -1), n, 1)

In [3]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

def rmse(model):
    return np.sqrt(-np.mean(cross_val_score(model, X, y, cv=CV, scoring="neg_mean_squared_error")))

CV = KFold(n_splits=3, shuffle=True, random_state=1234)

In [None]:
from sklearn.svm import LinearSVR
from sklearn.multioutput import MultiOutputRegressor

from datasets import get_datasets
datasets = get_datasets()



for name in datasets.keys():
    X, y = datasets[name]
    print("dataset : ", name, " . X shape : ", X.shape, " . y shape : ", y.shape)
    
    sksvr = []
    for C in [1e-3, 1e-2, 1e-2, 1, 10, 100, 1000, 10000]:
        print('.', end='')
        regr = MultiOutputRegressor(LinearSVR(C=C))
        sksvr.append(rmse(regr))
    print("\t Sklearn SVR:", np.amin(sksvr))

    mlssvr = []
    for gamma in [1e-3, 1e-2, 1e-1]:
        for lamb in [1, 10, 100]:
            print('.', end='')
            regr = MLS_SVR(gamma, lamb, "linear")
            mlssvr.append(rmse(regr))
            
    print("\t MLS_SVR:", np.amin(mlssvr))

dataset :  online_sales  . X shape :  (751, 1813)  . y shape :  (751, 12)
........	 Sklearn SVR: 18593.231707
....