Machine Learnng - the problem setting

In [1]:
from sklearn import datasets

In [2]:
iris = datasets.load_iris()

In [3]:
digits = datasets.load_digits()

In [4]:
print(digits.data)

[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]


In [5]:
digits.target

array([0, 1, 2, ..., 8, 9, 8])

In [7]:
digits.images[0]

array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])

Loading an Example Dataset

In [9]:
from sklearn import svm

In [11]:
clf = svm.SVC(gamma=0.001, C=100.)

In [12]:
clf.fit(digits.data[:-1], digits.target[:-1])

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [13]:
clf.predict(digits.data[-1:])

array([8])

Learning and Predicting

In [17]:
from sklearn import svm 
from sklearn import datasets

In [18]:
clf = svm.SVC()

In [19]:
iris = datasets.load_iris()

In [20]:
X, y = iris.data, iris.target

In [21]:
clf.fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Model Persistence

In [22]:
import pickle

In [23]:
s = pickle.dumps(clf)

In [24]:
clf2 = pickle.loads(s)

In [25]:
clf2.predict(X[0:1])

array([0])

In [26]:
y[0]

0

In [29]:
from sklearn.externals import joblib

In [30]:
joblib.dump(clf, 'filename.pkl')

['filename.pkl',
 'filename.pkl_01.npy',
 'filename.pkl_02.npy',
 'filename.pkl_03.npy',
 'filename.pkl_04.npy',
 'filename.pkl_05.npy',
 'filename.pkl_06.npy',
 'filename.pkl_07.npy',
 'filename.pkl_08.npy',
 'filename.pkl_09.npy',
 'filename.pkl_10.npy',
 'filename.pkl_11.npy']

In [31]:
clf = joblib.load('filename.pkl')

In [32]:
clf

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Conventions

In [35]:
import numpy as np
from sklearn import random_projection
rng  = np.random.RandomState(0)
X = rng.rand(10, 2000)
X = np.array(X, dtype='float32')
X.dtype
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(X)
X_new.dtype

dtype('float64')

In [38]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data, iris.target)
list(clf.predict(iris.data[:3]))
clf.fit(iris.data, iris.target_names[iris.target])
list(clf.predict(iris.data[:3]))

['setosa', 'setosa', 'setosa']

In [45]:
import numpy as np
from sklearn.svm import SVC

rng = np.random.RandomState(0)
X = rng.rand(100, 10)
y = rng.binomial(1, 0.5, 100)
X_test = rng.rand(5, 10)
clf = SVC()
clf.set_params(kernel='linear').fit(X, y)
print(clf.predict(X_test))
clf.set_params(kernel='rbf').fit(X, y)
print(clf.predict(X_test))

[1 0 1 1 0]
[0 0 0 1 0]


In [46]:
from sklearn.svm import SVC
from sklearn.multiclass  import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer


In [47]:
X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]

In [48]:
X

[[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]

In [49]:
y = [0, 0, 1, 1, 2]

In [50]:
classif =  OneVsRestClassifier(estimator=SVC(random_state=0))
classif.fit(X, y).predict(X)

array([0, 0, 1, 1, 2])

In [52]:
from sklearn.preprocessing import MultiLabelBinarizer
y = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
y = MultiLabelBinarizer().fit_transform(y)
classif.fit(X, y).predict(X)

array([[1, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 0],
       [1, 0, 1, 0, 0]])