# Multiclass Classification

### 1. OVA (One vs All)
### 2. OVO (One vs One)

In [4]:
import numpy as np

In [5]:
from sklearn.datasets import load_iris

In [6]:
iris = load_iris()

In [7]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [8]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [9]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [10]:
from sklearn.svm import LinearSVC

In [11]:
from sklearn.pipeline import Pipeline

In [12]:
from sklearn.base import BaseEstimator, TransformerMixin

In [13]:
from sklearn.preprocessing import StandardScaler

In [14]:
class Shuffler(BaseEstimator, TransformerMixin):
    def __init__self(self):
        pass
    def fit(self,X,y=None):
         return self
    def transform(self,X,y=None):
         return X[np.random.permutation(len(X))]          

In [15]:
svc_pipe = Pipeline([
    ('Shuffler', Shuffler()),
    ('scaler', StandardScaler()),
    ('svc', LinearSVC(loss='hinge', C=1.0))
])

In [16]:
svc_pipe.fit(iris.data, iris.target)

Pipeline(memory=None,
     steps=[('Shuffler', Shuffler()), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svc', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))])

In [17]:
[iris.target_names[i] for i in svc_pipe.predict(iris.data[:50])]

['virginica',
 'versicolor',
 'versicolor',
 'versicolor',
 'versicolor',
 'versicolor',
 'setosa',
 'virginica',
 'versicolor',
 'virginica',
 'versicolor',
 'setosa',
 'virginica',
 'versicolor',
 'virginica',
 'virginica',
 'versicolor',
 'virginica',
 'versicolor',
 'virginica',
 'virginica',
 'setosa',
 'virginica',
 'virginica',
 'versicolor',
 'virginica',
 'virginica',
 'versicolor',
 'setosa',
 'virginica',
 'versicolor',
 'versicolor',
 'virginica',
 'versicolor',
 'versicolor',
 'virginica',
 'virginica',
 'versicolor',
 'versicolor',
 'versicolor',
 'setosa',
 'virginica',
 'virginica',
 'versicolor',
 'setosa',
 'versicolor',
 'virginica',
 'versicolor',
 'virginica',
 'virginica']

In [18]:
from sklearn.multiclass import OneVsRestClassifier

In [21]:
svc_pipe_1 = Pipeline([
    ('Shuffler', Shuffler()),
    ('scaler', StandardScaler()),
    ('svc', OneVsRestClassifier(LinearSVC(loss='hinge', C=1.0)))
])

In [22]:
svc_pipe_1.fit(iris.data, iris.target)

Pipeline(memory=None,
     steps=[('Shuffler', Shuffler()), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svc', OneVsRestClassifier(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0),
          n_jobs=None))])