In [35]:
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import (
    train_test_split,
    cross_val_score,
    RandomizedSearchCV,
)
from scipy.stats import uniform, loguniform
from sklearn.svm import LinearSVC, SVC

In [36]:
wine = load_wine(as_frame=True)

In [37]:
wine.data.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [38]:
# spliting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    wine.data, wine.target, random_state=69
)
X_train.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
147,12.87,4.61,2.48,21.5,86.0,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625.0
30,13.73,1.5,2.7,22.5,101.0,3.0,3.25,0.29,2.38,5.7,1.19,2.71,1285.0
132,12.81,2.31,2.4,24.0,98.0,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560.0
79,12.7,3.87,2.4,23.0,101.0,2.83,2.55,0.43,1.95,2.57,1.19,3.13,463.0
43,13.24,3.98,2.29,17.5,103.0,2.64,2.63,0.32,1.66,4.36,0.82,3.0,680.0


In [39]:
y_train.head()

147    2
30     0
132    2
79     1
43     0
Name: target, dtype: int64

In [40]:
lin_clf = LinearSVC(max_iter=1_000_000, random_state=69)
lin_clf.fit(X_train, y_train)
# failed to converge because we forgot to scale the data



In [41]:
lin_clf = make_pipeline(StandardScaler(), LinearSVC(random_state=69))
lin_clf.fit(X_train, y_train)

In [42]:
# now we can evaluate the model using cross validation
cross_val_score(lin_clf, X_train, y_train, cv=9).mean()

0.9624338624338624

In [43]:
# kernalized svm
svm_clf = make_pipeline(StandardScaler(), SVC(random_state=69))
cross_val_score(svm_clf, X_train, y_train, cv=9).mean()

0.9629629629629629

In [44]:
# turning the hyperparameters
param_distributions = {"svc__gamma": loguniform(0.001, 0.1), "svc__C": uniform(1, 10)}

random_search_cv = RandomizedSearchCV(
    svm_clf, param_distributions, n_iter=100, cv=9, random_state=69
)
random_search_cv.fit(X_train, y_train)
random_search_cv.best_estimator_

In [45]:
random_search_cv.best_score_

0.9851851851851853

In [47]:
print("Test Score:", random_search_cv.score(X_test, y_test))

Test Score: 0.9777777777777777
