In [11]:
import sklearn
from sklearn import svm
from sklearn import datasets

# Digit Classifier

In [2]:
iris = datasets.load_iris()
digits = datasets.load_digits()

In [5]:
print(digits.data)

[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]


In [14]:
clf = svm.SVC(gamma=.001, C=100.)

In [24]:
clf.fit(digits.data[:-1], digits.target[:-1])

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [25]:
clf.predict(digits.data[-1:])

array([8])

# Saving a  model in scikit learn: 
Using the built-in persistence model Pickle

In [27]:
from sklearn import svm
from sklearn import datasets

In [28]:
clf = svm.SVC()
iris = datasets.load_iris()

In [30]:
X, y = iris.data, iris.target
clf.fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

#### Crux

In [31]:
import pickle
s = pickle.dumps(clf)

In [35]:
clf2 = pickle.loads(s)
clf2.predict(X[0:1])

array([0])

In [34]:
y[0]

0

#### Crux:Better
for saving to disk

In [36]:
from sklearn.externals import joblib
joblib.dump(clf, 'filename.pkl')

['filename.pkl',
 'filename.pkl_01.npy',
 'filename.pkl_02.npy',
 'filename.pkl_03.npy',
 'filename.pkl_04.npy',
 'filename.pkl_05.npy',
 'filename.pkl_06.npy',
 'filename.pkl_07.npy',
 'filename.pkl_08.npy',
 'filename.pkl_09.npy',
 'filename.pkl_10.npy',
 'filename.pkl_11.npy']

and now let's use what we loaded to predict something. 

In [38]:
clf = joblib.load('filename.pkl')
clf.predict(X[0:1])

array([0])

# Type casting: 
all inputs are converted to float64

In [47]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data, iris.target)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [48]:
list(clf.predict(iris.data[:3]))

[0, 0, 0]

These outputs are in float64 ^^^^

In [41]:
clf.fit(iris.data, iris.target_names[iris.target])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [42]:
list(clf.predict(iris.data[:3]))

['setosa', 'setosa', 'setosa']

In [70]:
type(iris.target_names)

numpy.ndarray

#### Sidebar ----------------------------
you can use an entire np.array object as an indexer into another np.array object

In [62]:
import numpy as np

In [73]:
keys = np.array(['name1', 'name3', 'name0'])

In [64]:
values = np.array([0,1,1,0,0,2,0,1])

In [75]:
keys[values]

array(['name1', 'name3', 'name3', 'name1', 'name1', 'name0', 'name1',
       'name3'], 
      dtype='|S5')

#### End Sidebar ----------------------------

# Refitting and updating parameters
hyper parameters can be updated after construction via
```
sklearn.pipline.Pipeline.set_params
```
Calling `fit()` more than once will overwrite what was learned by any previous `fit()`

In [81]:
import numpy as np
from sklearn.svm import SVC
rng = np.random.RandomState(0)
X = rng.rand(100, 10)
y = rng.binomial(1, .5, 100)
X_test = rng.rand(5,10)
clf = SVC()
clf.set_params(kernel='linear').fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

##### fit to linear

In [82]:
clf.predict(X_test)

array([1, 0, 1, 1, 0])

##### Change to instead fit to 'rbf'

In [83]:
clf.set_params(kernel='rbf').fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [84]:
clf.predict(X_test)

array([0, 0, 0, 1, 0])