based on : https://scikit-learn.org/stable/tutorial/basic/tutorial.html  

## Learning and predicting

In [1]:
from sklearn import datasets

In [2]:
iris = datasets.load_iris()
digits = datasets.load_digits()

In [14]:
from sklearn import svm

# we set the value of gamma manually. To find good values for these parameters, we can use tools such as grid search and cross validation.
clf = svm.SVC(gamma=0.001, C=100.) 

In [15]:
clf.fit(digits.data[:-1], digits.target[:-1]) # 학습

SVC(C=100.0, gamma=0.001)

In [16]:
clf.predict(digits.data[-1:])

array([8])

## Conventions

### Type casting

In [18]:
import numpy as np
from sklearn import kernel_approximation

rng = np.random.RandomState(0)
X = rng.rand(10, 2000)
X = np.array(X, dtype="float32")
X.dtype

dtype('float32')

In [28]:
transformer = kernel_approximation.RBFSampler()
X_new = transformer.fit_transform(X) 
X_new.dtype # X cast to float64 by fit_transform(X).

dtype('float64')

In [24]:
from sklearn import datasets
from sklearn.svm import SVC

iris = datasets.load_iris()
clf = SVC()

In [25]:
clf.fit(iris.data, iris.target)
list(clf.predict(iris.data[:3])) # target이 정수 배열이라 반환도 정수 배열

[0, 0, 0]

In [26]:
clf.fit(iris.data, iris.target_names[iris.target])
list(clf.predict(iris.data[:3])) # y가 문자열 배열이라 반환도 문자열 배열

['setosa', 'setosa', 'setosa']

### Refitting and updating parameters

In [29]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.svm import SVC

In [34]:
X, y = load_iris(return_X_y=True)

clf = SVC()
clf.set_params(kernel="linear").fit(X, y) # 한 번 fit
clf.predict(X[:5])

clf.set_params(kernel='rbf').fit(X, y) # 두 번 fit -> 가존 SVC 모델을 덮어쓰게 됨
clf.predict(X[:5])


array([0, 0, 0, 0, 0])

### Multiclass vs. multilabel fitting

In [35]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier # When using *multiclass classifiers*, the learning and prediction task that is performed is dependent on the format of the target data fit upon:
from sklearn.preprocessing import LabelBinarizer

In [37]:
X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
y = [0, 0, 1, 1, 2]

classif = OneVsRestClassifier(estimator=SVC(random_state=0))
classif.fit(X, y).predict(X)

array([0, 0, 1, 1, 2])

In [38]:
y = LabelBinarizer().fit_transform(y)
classif.fit(X, y).predict(X)

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]])