First let's import the required libraries.

In [1]:
import numpy as np
import cvxopt 
import cvxopt.solvers
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits import mplot3d
from IPython.display import HTML

In [2]:
iris = load_iris()
X, y = iris["data"], iris["target"]

In [3]:
plt.ioff()
fig = plt.figure()
ax = plt.axes(projection ="3d")

def rot(deg):
    ax.view_init(30, deg)

ax.scatter3D(*X[y==0,:3].T, color="green")
ax.scatter3D(*X[y==1,:3].T, color="red")
ax.scatter3D(*X[y==2,:3].T, color="blue")

ani = animation.FuncAnimation(fig, rot, frames=360, interval=8)
plt.close()
HTML(ani.to_html5_video())

In [38]:
from itertools import combinations
from sklearn.preprocessing import StandardScaler

class RBF_SVC_OVR(BaseEstimator, ClassifierMixin):
    
    def K(self, a, b):
        return np.exp(-self.gamma*np.linalg.norm(a-b)**2)
    
    def __init__(self, gamma = 1.0, C = 1.0, scale_input = True, support_tolerance = 1e-3):
        self.C = C
        if gamma != "scale":
            self.gamma = gamma
        else:
            self.gamma = None
        self.scale_input = scale_input
        self.support_tolerance = support_tolerance
        
    def predict(self, X):
        if self.scale_input:
            X = self.scaler.transform(X)
        
        #X_b = np.c_[np.ones((X.shape[0], 1)), X]
        X_b = X
                
        row_scores_clf = []
        
        for row in X_b:
            row_scores_clf.append((row, -float("inf"), list(self.classes.keys())[0]))
            
        
        
        for c, (a, t, bias) in self.classes.items():
            for i, (row, score, _) in enumerate(row_scores_clf):
                sample = np.matrix(row)
                s = 0
                for aval, tval, train_sample in zip(a, t, self.xtrain):
                    #print(sample, train_sample)
                    if aval <= self.support_tolerance:
                        continue
                    s += aval*tval*self.K(sample, np.matrix(train_sample))
                res = s + bias
                if res > score:
                    row_scores_clf[i] = (row, res, c)
         
        return [c for (_, _, c) in row_scores_clf]                                         
        
    def fit(self, X, y):
        if self.scale_input:
            self.scaler = StandardScaler()
            self.scaler.fit(X)
            X = self.scaler.transform(X)
        #add bias
        #X_b = np.c_[np.ones((X.shape[0], 1)), X]
        X_b = X
        if self.gamma == None:
            self.gamma = 1/X_b.shape[1]
        
        self.xtrain = X_b
        #find the classes
        self.classes = {}
        for c in y:
            self.classes[c] = None
        #create kernel matrix
        ker_mat = np.zeros((X.shape[0], X.shape[0]))
        for i in range(X.shape[0]):
            for j in range(i, X.shape[0]):
                k = self.K(X[i,:],X[j,:])
                ker_mat[i,j] = k
                ker_mat[j,i] = k
                
        #generate OvR parameters
        for c in self.classes.keys():
            #generate y
            ytrgt = -np.ones((y.shape[0], 1))
            ytrgt[y == c] = 1
            
            trgt_map = ytrgt @ ytrgt.T
            
            P = np.multiply(trgt_map, ker_mat)
            q = -np.ones((y.shape[0], 1))
            h = np.hstack([np.zeros_like(y).reshape(-1,), np.full((y.shape[0],), self.C)]).reshape(-1, 1)
            
            G = np.zeros((2*y.shape[0], y.shape[0]))
            np.fill_diagonal(G, -1)
            np.fill_diagonal(G[y.shape[0]:,:], 1)
                                     
            A = ytrgt.T
            
            cvxopt.solvers.options['show_progress'] = False
            
            a_hat = cvxopt.solvers.qp(
                q = cvxopt.matrix(q), #to substract all a values
                P = cvxopt.matrix(P),
                h = cvxopt.matrix(h),
                G = cvxopt.matrix(G),
                b = cvxopt.matrix(np.zeros((1,1))),
                A = cvxopt.matrix(A)
            )     
            
            #calculate bias for the class
            n_s = 0
            s = 0                   
            
            for i, (a_i, t_i) in enumerate(zip(np.matrix(a_hat["x"]), ytrgt)):
                if a_i <= self.support_tolerance:
                    continue
                n_s += 1
                for j, (a_j, t_j) in enumerate(zip(np.matrix(a_hat["x"]), ytrgt)):
                    if a_j <= self.support_tolerance:
                        continue
                    s += t_i -a_j*t_j*ker_mat[i,j]
            
            bias = s / n_s
            self.classes[c] = (np.matrix(a_hat["x"]), ytrgt, bias)
        
        return self

In [98]:
from sklearn.model_selection import train_test_split

In [139]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [190]:
rbf_svm = RBF_SVC_OVR(support_tolerance=1e-2, C = 2.5, gamma=1e-1)

In [191]:
rbf_svm.fit(X_train, y_train)

RBF_SVC_OVR(C=2.5, gamma=0.1, support_tolerance=0.01)

In [192]:
y_pred = rbf_svm.predict(X_test)

In [193]:
from sklearn.metrics import accuracy_score

In [194]:
accuracy_score(y_pred, y_test)

1.0

So with some hyperparameter tuning I was able to achieve a perfect score on this split.

Some more hyperparameter exploring goes on below.

In [891]:
from sklearn.model_selection import GridSearchCV

clf = RBF_SVC_OVR()

clf_params = {
    "support_tolerance" : [1e-12, 1e-10],
    "gamma" : np.logspace(-5, 5, 10),
    "C" : np.logspace(-5, 5, 10)
}

clf_grid = GridSearchCV(clf, clf_params, cv=3, n_jobs=8, scoring="accuracy", verbose=2)

In [892]:
clf_grid.fit(X_train, y_train)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


GridSearchCV(cv=3, estimator=RBF_SVC_OVR(), n_jobs=8,
             param_grid={'C': array([1.00000000e-05, 1.29154967e-04, 1.66810054e-03, 2.15443469e-02,
       2.78255940e-01, 3.59381366e+00, 4.64158883e+01, 5.99484250e+02,
       7.74263683e+03, 1.00000000e+05]),
                         'gamma': array([1.00000000e-05, 1.29154967e-04, 1.66810054e-03, 2.15443469e-02,
       2.78255940e-01, 3.59381366e+00, 4.64158883e+01, 5.99484250e+02,
       7.74263683e+03, 1.00000000e+05]),
                         'support_tolerance': [1e-12, 1e-10]},
             scoring='accuracy', verbose=2)

In [894]:
clf_grid.best_estimator_

RBF_SVC_OVR(C=100000.0, gamma=0.021544346900318846, support_tolerance=1e-12)

In [895]:
clf_params = {
    "support_tolerance" : [1e-12],
    "gamma" : np.logspace(-3, 1, 10),
    "C" : np.logspace(0, 4, 10)
}

clf_grid = GridSearchCV(clf, clf_params, cv=3, n_jobs=8, scoring="accuracy", verbose=2)

In [896]:
clf_grid.fit(X_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


GridSearchCV(cv=3, estimator=RBF_SVC_OVR(), n_jobs=8,
             param_grid={'C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
       5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
       3.59381366e+03, 1.00000000e+04]),
                         'gamma': array([1.00000000e-03, 2.78255940e-03, 7.74263683e-03, 2.15443469e-02,
       5.99484250e-02, 1.66810054e-01, 4.64158883e-01, 1.29154967e+00,
       3.59381366e+00, 1.00000000e+01]),
                         'support_tolerance': [1e-12]},
             scoring='accuracy', verbose=2)

In [897]:
clf_grid.best_score_

0.8083333333333332

In [898]:
clf_grid.best_estimator_

RBF_SVC_OVR(C=10000.0, gamma=0.021544346900318832, support_tolerance=1e-12)

In [1101]:
clf_params = {
    "support_tolerance" : [1e-5, 1e-3, 1e-1],
    "gamma" : np.logspace(-4, -2, 20),
    "C" : np.linspace(0.5, 2.5, 10)
}

clf_grid = GridSearchCV(clf, clf_params, cv=3, n_jobs=8, scoring="accuracy", verbose=2)

In [1102]:
clf_grid.fit(X_train, y_train)

Fitting 3 folds for each of 600 candidates, totalling 1800 fits


GridSearchCV(cv=3, estimator=RBF_SVC_OVR(), n_jobs=8,
             param_grid={'C': array([0.5       , 0.72222222, 0.94444444, 1.16666667, 1.38888889,
       1.61111111, 1.83333333, 2.05555556, 2.27777778, 2.5       ]),
                         'gamma': array([0.0001    , 0.00012743, 0.00016238, 0.00020691, 0.00026367,
       0.00033598, 0.00042813, 0.00054556, 0.00069519, 0.00088587,
       0.00112884, 0.00143845, 0.00183298, 0.00233572, 0.00297635,
       0.00379269, 0.00483293, 0.00615848, 0.0078476 , 0.01      ]),
                         'support_tolerance': [1e-05, 0.001, 0.1]},
             scoring='accuracy', verbose=2)

In [1103]:
clf_grid.best_score_

0.7833333333333333

In [1105]:
clf_grid.best_params_

{'C': 1.3888888888888888,
 'gamma': 0.007847599703514606,
 'support_tolerance': 0.001}

In [1116]:
clf_params = {
    "support_tolerance" : [1e-4, 1e-3, 1e-2],
    "gamma" : np.logspace(-7, -4, 20),
    "C" : np.linspace(1.2, 1.5, 10)
}

clf_grid = GridSearchCV(clf, clf_params, cv=3, n_jobs=8, scoring="accuracy", verbose=2)

In [1117]:
clf_grid.fit(X_train, y_train)

Fitting 3 folds for each of 600 candidates, totalling 1800 fits


GridSearchCV(cv=3, estimator=RBF_SVC_OVR(), n_jobs=8,
             param_grid={'C': array([1.2       , 1.23333333, 1.26666667, 1.3       , 1.33333333,
       1.36666667, 1.4       , 1.43333333, 1.46666667, 1.5       ]),
                         'gamma': array([1.00000000e-07, 1.43844989e-07, 2.06913808e-07, 2.97635144e-07,
       4.28133240e-07, 6.15848211e-07, 8.85866790e-07, 1.27427499e-06,
       1.83298071e-06, 2.63665090e-06, 3.79269019e-06, 5.45559478e-06,
       7.84759970e-06, 1.12883789e-05, 1.62377674e-05, 2.33572147e-05,
       3.35981829e-05, 4.83293024e-05, 6.95192796e-05, 1.00000000e-04]),
                         'support_tolerance': [0.0001, 0.001, 0.01]},
             scoring='accuracy', verbose=2)

In [1118]:
clf_grid.best_score_

0.7166666666666667

In [1119]:
clf_grid.best_params_

{'C': 1.5, 'gamma': 0.0001, 'support_tolerance': 0.001}

Let's try a blind approach with bias squaring.

In [1127]:
rbf_svm = RBF_SVC_OVR(support_tolerance=1e-3)

In [1128]:
rbf_svm.fit(X_train, y_train, square_bias=True)

RBF_SVC_OVR()

In [1129]:
y_pred = rbf_svm.predict(X_test)

In [1130]:
accuracy_score(y_pred, y_test)

0.9666666666666667