In [62]:
import pandas as pd
from sklearn.datasets import load_wine

In [63]:
dataset = load_wine(as_frame=True)
df = pd.DataFrame(dataset.data)
df['Target'] = dataset.target
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,Target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [64]:
df['Target'].value_counts()

Target
1    71
0    59
2    48
Name: count, dtype: int64

In [65]:
df.shape

(178, 14)

In [12]:
df = df[(df['Target'] == 0) | (df['Target'] == 1)]
df['Target'].value_counts()

Target
1    71
0    59
Name: count, dtype: int64

In [66]:
from sklearn.model_selection import train_test_split
X = df.iloc[:, :-1]
y = df.iloc[:, -1:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [67]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [24]:
pipeline = make_pipeline(
    StandardScaler(),
    LinearSVC(max_iter=1000, dual=True, random_state=42)
)
pipeline.fit(X_train, y_train.values.ravel())

In [25]:
y_pred = pipeline.predict(X_test)

In [26]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred)

1.0

In [28]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(pipeline, X, y, scoring='f1', cv = 10, n_jobs=-1)

In [32]:
scores.mean()

0.9779487179487181

In [75]:
# Kernilized SVM
from sklearn.svm import SVC

pipeline = make_pipeline(
    StandardScaler(),
    SVC(decision_function_shape='ovo')
)

In [76]:
pipeline.fit(X_train, y_train.values.ravel())

In [77]:
pipeline.predict(X_test)

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0])

In [78]:
scores = cross_val_score(pipeline, X, y, cv = 10, scoring='accuracy', n_jobs=-1)

In [79]:
scores.mean()

0.9777777777777779

In [80]:
from sklearn.model_selection import GridSearchCV
model_params = {
    'svc__C' : [0.1, 5, 100],
    'svc__gamma' : [0.1, 0.5, 5],
    'svc__decision_function_shape' : ['ovr', 'ovo']
}

In [81]:
cv = GridSearchCV(pipeline, model_params, cv = 5)

In [82]:
cv.fit(X, y.values.ravel())

In [83]:
cv.best_params_

{'svc__C': 5, 'svc__decision_function_shape': 'ovr', 'svc__gamma': 0.1}

In [84]:
y_pred = cv.best_estimator_.predict(X_test)

In [87]:
import sklearn
sklearn.metrics.accuracy_score(y_test, y_pred)

1.0

In [90]:
scores = cross_val_score(cv.best_estimator_, X, y.values.ravel(), cv = 25)

In [91]:
scores.mean()

0.9785714285714286