## Train a LinearSVC on a linearly separable dataset. Then train an SVC and a SGDClassifier on the same dataset. See if you can get them to produce roughly the same model.


In [8]:
from sklearn.datasets import load_wine

wine_data = load_wine()
X = wine_data['data'][:,[0, 9]]
y = wine_data['target']

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

In [10]:
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler



svc = LinearSVC(dual=False, C=1, random_state=42)
svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('linear_svc', svc)
])

svm_clf.fit(X_train, y_train)

In [11]:
svm_predict = svm_clf.predict(X_train)

In [12]:
from sklearn.metrics import accuracy_score

accuracy_score(svm_predict, y_train)

0.8450704225352113

In [13]:
from sklearn.linear_model import SGDClassifier

sgd_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('SGD', SGDClassifier(alpha=0.1, random_state=42))
])


sgd_clf.fit(X_train, y_train)

In [14]:
sgd_predict = sgd_clf.predict(X_train)

In [15]:
accuracy_score(sgd_predict, y_train)

0.8380281690140845

## Train an SVM classifier on the MNIST dataset. Since SVM classifiers are binary classifers, you will need to use one-versus-the-rest to classify all the digits. You may want to tune the hyperparameters using small validation sets to speed up the process. What accuracy can you reach?

In [200]:
from sklearn.datasets import load_digits

MNIST = load_digits()
X = MNIST['data']
y = MNIST['target']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3)

In [201]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
svc = SVC(C=100, gamma=0.01, kernel='poly')
svm_OvR_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('OvR_svc', OneVsRestClassifier(svc))
])
svm_OvR_clf.fit(X_train, y_train)

In [202]:
svc_predict = svm_OvR_clf.predict(X_val) 
accuracy_score(svc_predict, y_val)

0.9888888888888889

In [207]:
from sklearn.datasets import fetch_california_housing

Housing = fetch_california_housing()
X = Housing['data']
y = Housing['target']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

In [258]:
from sklearn.svm import SVR

SVR = SVR(C=100)
svr = Pipeline([
    ('scaler', StandardScaler()),
    ('SVR', SVR)
])
svr.fit(X_train, y_train)

In [263]:
svr_predict = svr.predict(X_val)

In [264]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_val, svr_predict, squared=True) * 100.000

34.57202348789175

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

parameters = {'C':[1, 10, 100], 'kernel':('linear', 'rbf')}

svr_grid = GridSearchCV(estimator=SVR(), param_grid=parameters)
svr_grid.fit(X_train, y_train)