In [1]:
import numpy as np
from sklearn import datasets

wine = datasets.load_wine()

X = wine.data
y = wine.target

X.shape, y.shape

((178, 13), (178,))

In [2]:
print(wine.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 66)

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,PolynomialFeatures
from sklearn.pipeline import Pipeline

logi = LogisticRegression()

logi.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [5]:
logi.score(X_test, y_test)

0.9722222222222222

In [6]:
from sklearn.model_selection import GridSearchCV

pipe = Pipeline([
    ('Poly',PolynomialFeatures(degree=2)),
    ('scaler',StandardScaler()),
    ('logi',LogisticRegression())
])

param_grid = [{
    'Poly__degree': [i for i in range(6)],
    'logi__C':[i for i in np.arange(0.01,0.11,10)],
    'logi__solver': ['lbfgs', 'liblinear']
}]

grid = GridSearchCV(pipe, param_grid=param_grid)

grid.fit(X_train, y_train)





GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('Poly',
                                        PolynomialFeatures(degree=2,
                                                           include_bias=True,
                                                           interaction_only=False,
                                                           order='C')),
                                       ('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('logi',
                                        LogisticRegression(C=1.0,
                                                           class_weight=None,
                                                           dual=False,
                          

In [7]:
grid.best_estimator_

Pipeline(memory=None,
         steps=[('Poly',
                 PolynomialFeatures(degree=4, include_bias=True,
                                    interaction_only=False, order='C')),
                ('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logi',
                 LogisticRegression(C=0.01, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='warn', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='liblinear', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [8]:
grid.score(X_test,y_test)

1.0

In [9]:
from sklearn.multiclass import OneVsRestClassifier

ovr = OneVsRestClassifier(LogisticRegression())

ovr.fit(X_train, y_train)



OneVsRestClassifier(estimator=LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='warn', tol=0.0001,
                                                 verbose=0, warm_start=False),
                    n_jobs=None)

In [10]:
ovr.score(X_test, y_test)

0.9722222222222222

In [11]:
modle = LogisticRegression(multi_class='ovr')
modle.fit(X_train,y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='ovr', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [12]:
modle.score(X_test, y_test)

0.9722222222222222

In [13]:
from sklearn.metrics import confusion_matrix,precision_score,recall_score,accuracy_score

y_pre = modle.predict(X_test)

confusion_matrix(y_test, y_pre)

array([[13,  1,  0],
       [ 0, 15,  0],
       [ 0,  0,  7]], dtype=int64)

In [14]:
precision_score(y_test,y_pre,average='weighted')

0.9739583333333334

In [15]:
recall_score(y_test,y_pre,average='weighted')

0.9722222222222222

In [16]:
accuracy_score(y_test,y_pre)

0.9722222222222222

In [17]:
from sklearn.multiclass import OneVsOneClassifier

ovo = OneVsOneClassifier(LogisticRegression())
ovo.fit(X_train, y_train)



OneVsOneClassifier(estimator=LogisticRegression(C=1.0, class_weight=None,
                                                dual=False, fit_intercept=True,
                                                intercept_scaling=1,
                                                l1_ratio=None, max_iter=100,
                                                multi_class='warn', n_jobs=None,
                                                penalty='l2', random_state=None,
                                                solver='warn', tol=0.0001,
                                                verbose=0, warm_start=False),
                   n_jobs=None)

In [18]:
ovo.score(X_test, y_test)

0.9722222222222222

In [19]:
modle = LogisticRegression(multi_class='multinomial', solver='newton-cg')

modle.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
                   warm_start=False)

In [20]:
modle.score(X_test, y_test)

1.0

In [21]:
from sklearn.metrics import confusion_matrix,precision_score,recall_score,accuracy_score

y_pre = modle.predict(X_test)

wine.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

In [22]:
confusion_matrix(y_test, y_pre)

array([[14,  0,  0],
       [ 0, 15,  0],
       [ 0,  0,  7]], dtype=int64)

In [23]:
precision_score(y_test,y_pre,average='weighted')

1.0

In [24]:
recall_score(y_test,y_pre,average='weighted')

1.0

In [25]:
accuracy_score(y_test,y_pre)

1.0