In [2]:
import numpy as np
from sklearn import svm

In [1]:
 def load_X():
    return np.array([[1, 6], [1, 7], [2, 5], [2, 8],
                     [4, 2], [4, 3], [5, 1], [5, 2],
                     [5, 3], [6, 1], [6, 2], [9, 4],
                     [9, 7], [10, 5], [10, 6], [11, 6],
                     [5, 9], [5, 10], [5, 11], [6, 9],
                     [6, 10], [7, 10], [8, 11]]) 
 
def load_y():
    return np.array([1, 1, 1, 1,
                     2, 2, 2, 2, 2, 2, 2,
                     3, 3, 3, 3, 3,
                     4, 4, 4, 4, 4, 4, 4])


# One Vs All classifier

Be sure to remember that LinearSVC uses the one-against-all method by default,

In [3]:
# Create a simple dataset 
X = load_X() 
y = load_y() 

# Transform the 4 classes problem 
# in 4 binary classes problems. 
y_1 = np.where(y == 1, 1, -1) 
y_2 = np.where(y == 2, 1, -1) 
y_3 = np.where(y == 3, 1, -1) 
y_4 = np.where(y == 4, 1, -1)

In [8]:
# Train one binary classifier on each problem. 
y_list = [y_1, y_2, y_3, y_4] 
classifiers = [] 
for y_i in y_list:
    clf = svm.SVC(kernel='linear', C=1000) # regularized parameter
    clf.fit(X, y_i)     
    classifiers.append(clf) 

In [11]:
def predict_class(X, classifiers):
    predictions = np.zeros((X.shape[0], len(classifiers)))
    for idx, clf in enumerate(classifiers):
        predictions[:, idx] = clf.predict(X) 
 # returns the class number if only one classifier predicted it
# returns zero otherwise.
    return np.where((predictions == 1).sum(1) == 1,
                    (predictions == 1).argmax(axis=1) + 1,
                    0) 

In [14]:
predict_class(X,classifiers)

array([1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
       4], dtype=int64)

In [16]:
def predict_class(X, classifiers):
    predictions = np.zeros((X.shape[0], len(classifiers)))
    for idx, clf in enumerate(classifiers):
        predictions[:, idx] = clf.decision_function(X) 
 
    # return the argmax of the decision function as suggested by Vapnik.
    return np.argmax(predictions, axis=1) + 1 

In [18]:
predict_class(X,classifiers)

array([1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
       4], dtype=int64)

In [19]:
from sklearn.svm import LinearSVC 
 
X = load_X()
y = load_y() 
 
clf = LinearSVC(C=1000, random_state=88, multi_class='ovr')  #One Vs Rest default is One Vs All
clf.fit(X,y) 
 
# Make predictions on two examples. 
X_to_predict = np.array([[5,5],[2,5]])
print(clf.predict(X_to_predict)) # prints [2 1] 

[2 1]


# One against One

 As a result, we train one classifier per pair of classes, which leads to K(K-1)/2 classifiers for K classes. Each classifier is trained on a subset of the data and produces its own decision boundary
 
 Predictions are made using a simple voting strategy. Each example we wish to predict is passed to each classifier, and the predicted class is recorded. Then, the class having the most votes is assigned to the example 
 

In [22]:
from itertools import combinations
from scipy.stats import mode
 
# Predict the class having the max number of votes.
def predict_class(X, classifiers, class_pairs):
    predictions = np.zeros((X.shape[0], len(classifiers)))
    for idx, clf in enumerate(classifiers):
        class_pair = class_pairs[idx]
        prediction = clf.predict(X)
        predictions[:, idx] = np.where(prediction == 1,
                                       class_pair[0], class_pair[1])
        return mode(predictions, axis=1)[0].ravel().astype(int) 

X = load_X()
y = load_y() 
 
# Create datasets. 
training_data = [] 
class_pairs = list(combinations(set(y), 2))
for class_pair in class_pairs:
    class_mask = np.where((y == class_pair[0]) | (y == class_pair[1]))
    y_i = np.where(y[class_mask] == class_pair[0], 1, -1)
    training_data.append((X[class_mask], y_i)) 

# Train one classifier per class.
classifiers = []
for data in training_data:
    clf = svm.SVC(kernel='linear', C=1000)
    clf.fit(data[0], data[1])
    classifiers.append(clf) 

# Make predictions on two examples.
X_to_predict = np.array([[5,5],[2,5]])
print(predict_class(X_to_predict, classifiers, class_pairs)) 

[0 0]


# “the one-against-one and DAG methods are more suitable for practical use than the other methods.”  oAo in SVC oAa in linearSVC