In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

In [2]:
X, y = load_wine(return_X_y=True)

In [3]:
X.shape

(178, 13)

In [4]:
y.shape

(178,)

In [5]:
print(len(set(y)))
y

3


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

In [6]:
RANDOM_STATE = 3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_train.shape)

(124, 13) (124,)
(54, 13) (124,)


In [7]:
model1 = KNeighborsClassifier(3)
model1.fit(X_train, y_train)
pred1 = model1.predict(X_test)
print('\nPrediction accuracy for the test dataset with k neighbors classifier без scaler and pca')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred1)))


Prediction accuracy for the test dataset with k neighbors classifier без scaler and pca
62.96%



In [8]:
model2 = make_pipeline(StandardScaler(), PCA(n_components=3), KNeighborsClassifier(3))
model2.fit(X_train, y_train)
pred2 = model2.predict(X_test)
print('\nPrediction accuracy for the test dataset with k neighbors classifier with scaler and pca')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred2)))


Prediction accuracy for the test dataset with k neighbors classifier with scaler and pca
94.44%



In [9]:
model3 = SVC(gamma=3, C=1)
model3.fit(X_train, y_train)
pred3 = model3.predict(X_test)
print('\nPrediction accuracy for the test dataset with svc без scaler and pca')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred3)))


Prediction accuracy for the test dataset with svc без scaler and pca
35.19%



In [10]:
model4 = make_pipeline(StandardScaler(), PCA(n_components=3), SVC(gamma=3, C=1))
model4.fit(X_train, y_train)
pred4 = model4.predict(X_test)
print('\nPrediction accuracy for the test dataset with svc with scaler and pca')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred4)))


Prediction accuracy for the test dataset with svc with scaler and pca
81.48%



In [11]:
model = SVC()
param_grid = {'kernel': ['linear', 'poly', 'rbf'], 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [10, 1, 0.1, 0.01, 0.001, 0.0001]} 
grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs = 8, verbose=1, scoring="accuracy")    
grid_search.fit(X_train, y_train)    
best_parameters = grid_search.best_estimator_.get_params()    
for para, val in list(best_parameters.items()):    
    print(para, val)    
model = SVC(kernel=best_parameters['kernel'], C=best_parameters['C'], gamma=best_parameters['gamma'])    
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('\nPrediction accuracy for the test dataset with svc with grid search')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred)))

Fitting 5 folds for each of 126 candidates, totalling 630 fits
C 0.1
break_ties False
cache_size 200
class_weight None
coef0 0.0
decision_function_shape ovr
degree 3
gamma 10
kernel linear
max_iter -1
probability False
random_state None
shrinking True
tol 0.001
verbose False

Prediction accuracy for the test dataset with svc with grid search
88.89%



In [12]:
param_grid = {'kernel': ['linear', 'poly', 'rbf'], 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [10, 1, 0.1, 0.01, 0.001, 0.0001]} 
model = make_pipeline(StandardScaler(), PCA(n_components=3), GridSearchCV(SVC(), param_grid, cv=5, n_jobs = 8, verbose=1) )  
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('\nPrediction accuracy for the test dataset with svc with scaler, pca and grid search')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred)))

Fitting 5 folds for each of 126 candidates, totalling 630 fits

Prediction accuracy for the test dataset with svc with scaler, pca and grid search
94.44%



In [13]:
param_grid = {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8]} 
model = make_pipeline(StandardScaler(), PCA(n_components=3), GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, n_jobs = 8, verbose=1) )  
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('\nPrediction accuracy for the test dataset with k neighbors classifier with scaler, pca and grid search')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred)))

Fitting 5 folds for each of 8 candidates, totalling 40 fits

Prediction accuracy for the test dataset with k neighbors classifier with scaler, pca and grid search
92.59%

