In [2]:
from sklearn.datasets import load_wine

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
from sklearn.model_selection import StratifiedKFold

In [5]:
from sklearn.model_selection import GridSearchCV

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
from sklearn.pipeline import Pipeline

In [8]:
from sklearn.naive_bayes import GaussianNB

In [9]:
from sklearn.metrics import accuracy_score

In [10]:
from sklearn.linear_model import LogisticRegression

In [25]:
from sklearn.neighbors import KNeighborsClassifier

In [11]:
data = load_wine()

In [12]:
data.data.shape, data.target.shape

((178, 13), (178,))

In [13]:
X = data.data
y = data.target

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=1)

In [15]:
X_train.shape, X_test.shape

((160, 13), (18, 13))

Gaussian naive Bayes

In [20]:
pipe = Pipeline([('scaler', StandardScaler()), ('gnb', GaussianNB())])

In [21]:
param_grid = {}

In [25]:
search = GridSearchCV(pipe, param_grid, n_jobs=-1, cv=StratifiedKFold(n_splits=5))

In [26]:
search.fit(X_train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
             error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('gnb',
                                        GaussianNB(priors=None,
                                                   var_smoothing=1e-09))],
                                verbose=False),
             iid='deprecated', n_jobs=-1, param_grid={},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [27]:
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)

Best parameter (CV score=0.969):
{}


In [28]:
pipe.fit(X_train,y_train)
pred = pipe.predict(X_test)

In [31]:
accuracy_score(pred, y_test)

1.0

Logistic regression

In [17]:
pipe = Pipeline([('scaler', StandardScaler()), ('lgr', LogisticRegression())])

In [18]:
param_grid = {'lgr__penalty':['l1','l2'],
             }

In [19]:
search = GridSearchCV(pipe, param_grid, n_jobs=-1, cv=StratifiedKFold(n_splits=5))

In [20]:
search.fit(X_train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
             error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('lgr',
                                        LogisticRegression(C=1.0,
                                                           class_weight=None,
                                                           dual=False,
                                                           fit_intercept=True,
                                                           intercept_scaling=1,
                                                           l1_ratio=None,
                                                           max_iter=100,
                   

In [21]:
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)

Best parameter (CV score=0.988):
{'lgr__penalty': 'l2'}


In [22]:
pipe = Pipeline([('scaler', StandardScaler()), ('lgr', LogisticRegression(penalty='l2'))])

In [23]:
pipe.fit(X_train,y_train)
pred = pipe.predict(X_test)

In [24]:
accuracy_score(pred, y_test)

1.0

Logistic regression hyperparameter: <br>
Penalty: l1/l2/elastic net <br>
tolerance: the tolerance value <br>
C: inverse of lambda used for regularization. smaller C implies higher lambda and vice-versa. <br>
intercept_scaling: intercept value for the model <br>
class_weight: weights of classes. Helpful during imbalanced class problems <br>
solver: solver for logistic regression. <br>
max_iter: Number of iterations <br>
multi_class: ovr for binary and multinomial loss for multi class (without liblinear solver) <br>
warm_start: useful when training on batches <br>
l1_ratio: useful for elastic net regularization <br>

Logistic regression attributes: <br>
classes_: <br>
coef_: coefficients learnt <br>
intercept_: intercept learnt <br>

KNN classifier

In [26]:
pipe = Pipeline([('scaler', StandardScaler()), ('knn', KNeighborsClassifier(n_jobs=-1))])
param_grid = {'knn__n_neighbors':[5,6,7,8,9,10],
              'knn__weights':['uniform','distance'],
              'knn__metric':['euclidean','mahalanobis']
             }
search = GridSearchCV(pipe, param_grid, n_jobs=-1, cv=StratifiedKFold(n_splits=5))
search.fit(X_train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
             error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('knn',
                                        KNeighborsClassifier(algorithm='auto',
                                                             leaf_size=30,
                                                             metric='minkowski',
                                                             metric_params=None,
                                                             n_jobs=-1,
                                                             n_neighbors=5, p=2,
                                                             weights='u

In [27]:
search.best_params_

{'knn__metric': 'euclidean', 'knn__n_neighbors': 10, 'knn__weights': 'uniform'}

In [29]:
pipe = Pipeline([('scaler', StandardScaler()), ('knn', KNeighborsClassifier(n_jobs=-1,metric='euclidean'
                                                                           ,n_neighbors=10
                                                                            ,weights='uniform'))])
pipe.fit(X_train, y_train)
pred = pipe.predict(X_test)

In [30]:
accuracy_score(pred,y_test)

1.0

In [31]:
pred

array([2, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 1, 2, 0])