In [1]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris['data'][:, (2,3)]
y = (iris['target'] == 2).astype(np.float64)

clf = Pipeline([
  ('scaler', StandardScaler()),
  ('linear_svc', LinearSVC(C=1, loss='hinge')),
])

clf.fit(X, y)


In [2]:
print(clf.predict([[5.5, 1.7]]))

[1.]


In [3]:
from sklearn.datasets import make_moons
from sklearn.preprocessing import PolynomialFeatures

X, y = make_moons(n_samples=100, noise=0.15)
clf = Pipeline([
  ('poly_features', PolynomialFeatures(degree=3)),
  ('scaler', StandardScaler()),
  ('svm', LinearSVC(C=10, loss="hinge")),
])

clf.fit(X, y)



In [4]:
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
X = iris['data'][:, (2,3)]
y = (iris['target'] == 2).astype(np.float64)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

linear_clf = Pipeline([
  ('scaler', StandardScaler()),
  ('linear', LinearSVC(C=1, loss='hinge')),
])

linear_clf.fit(X_train, y_train)


In [5]:
from sklearn.svm import SVC
svc_clf = Pipeline([
  ('scaler', StandardScaler()),
  ('svc', SVC(C=1)),
])

svc_clf.fit(X_train, y_train)

In [6]:
from sklearn.linear_model import SGDClassifier
sgd_clf = Pipeline([
  ('scaler', StandardScaler()),
  ('sgd', SGDClassifier(loss='hinge')),
])

sgd_clf.fit(X_train, y_train)

In [7]:
from sklearn.metrics import confusion_matrix

print(confusion_matrix(y_test, linear_clf.predict(X_test)))
print(confusion_matrix(y_test, svc_clf.predict(X_test)))
print(confusion_matrix(y_test, sgd_clf.predict(X_test)))

[[20  0]
 [ 3  7]]
[[20  0]
 [ 3  7]]
[[20  0]
 [ 2  8]]


In [8]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
print(mnist.keys())

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])


In [10]:
X = mnist['data'].values[:10000]
y = mnist['target'].values[:10000]
print(X.shape, y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

(10000, 784) (10000,)


In [15]:
from sklearn.model_selection import cross_val_score

linear_clf.fit(X_train, y_train)

cross_val_score(linear_clf, X_train, y_train, cv=3)



array([0.84289464, 0.82602175, 0.83420855])

In [16]:
confusion_matrix(y_test, linear_clf.predict(X_test))

array([[193,   1,   1,   0,   1,   3,   1,   1,   2,   1],
       [  0, 233,   3,   1,   0,   3,   1,   0,   3,   1],
       [  4,   6, 141,   9,   5,   1,   4,   3,   5,   1],
       [  1,   0,   6, 170,   0,  13,   1,   5,   7,   4],
       [  2,   1,   7,   1, 150,   0,   4,   4,   4,   9],
       [  0,   0,   2,  14,   7, 153,   2,   1,   3,   2],
       [  4,   2,   4,   1,   1,   5, 186,   0,   2,   1],
       [  3,   2,   2,   1,   4,   1,   1, 185,   1,   7],
       [  8,   7,   5,   6,   1,  10,   2,   4, 125,   8],
       [  4,   5,   1,   4,   9,   1,   2,  17,   4, 163]])

In [17]:
housing = datasets.fetch_california_housing()

print(housing.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])


In [24]:
X, y = housing['data'], housing['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X.shape, y.shape)

(20640, 8) (20640,)


In [39]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

svr_reg = Pipeline([
  ('scaler', StandardScaler()),
  ('svr', SVR(kernel='rbf')),
])

svr_reg.fit(X_train, y_train)
mean_squared_error(y_train, svr_reg.predict(X_train))

0.3268795882588395

In [40]:
cross_val_score(svr_reg, X_train, y_train, cv=3, scoring='neg_mean_squared_error')

array([-0.34566392, -0.34761667, -0.34224931])

In [41]:
mean_squared_error(y_test, svr_reg.predict(X_test))

0.33321502146830273

In [42]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression()
reg.fit(X_train, y_train)
mean_squared_error(y_test, reg.predict(X_test))

0.5125420894516889