In [1]:
# OvO and OvR Multi-class Classification with TensorFlow

In [15]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

In [3]:
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data
y = mnist.target.astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
first_digit = X.loc[0]

In [4]:
svm_clf = SVC()

svm_clf.fit(X_train, y_train)

In [5]:
svm_clf.predict([first_digit])



array([5])

In [6]:
some_digit_scores = svm_clf.decision_function([first_digit])
# it returns decision scores for each class



In [9]:
np.argmax(some_digit_scores) # returns the index of the class with the highest score

5

In [10]:
svm_clf.classes_ # returns list of classes

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
# let's create OvR classifier
ovr_clf = OneVsRestClassifier(SVC())
ovr_clf.fit(X_train, y_train)

In [13]:
ovr_clf.predict([first_digit]), len(ovr_clf.estimators_)



(array([5]), 10)

In [16]:
sgd_clf = SGDClassifier() # let's create a SGDClassifier for multi-class classification
sgd_clf.fit(X_train, y_train)

In [17]:
sgd_clf.decision_function([first_digit])  # returns decision scores for each class



array([[-12981.71672545, -37846.30910301,  -8130.38784484,
         -3181.87151433, -22301.98217076,   2942.75378416,
        -33793.06444801, -27214.08317595,  -7856.48877304,
        -17713.14104684]])

In [18]:
# let's use cross_val_score to evaluate the SGDClassifier
cross_val_score(
    sgd_clf, X_train, y_train, cv=3, scoring='accuracy'
) # array([0.85650902, 0.87863662, 0.88130893])

array([0.85650902, 0.87863662, 0.88130893])

In [19]:
# let's use Scaler to scale the data
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))

cross_val_score(
    sgd_clf, X_train_scaled, y_train, cv=3, scoring='accuracy'
) # array([0.90707875, 0.90727575, 0.89966218])



array([0.90707875, 0.90727575, 0.89966218])