In [5]:
# Bibliotecas de uso geral
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [27]:
# Dataset de vinhos
df_wine = pd.read_csv('https://archive.ics.uci.edu/'
                             'ml/machine-learning-databases/'
                             'wine/wine.data', header=None)
df_wine.columns = ['Class label', 'Alcohol',
                    'Malic acid', 'Ash',
                    'Alcalinity of ash', 'Magnesium',
                    'Total phenols', 'Flavanoids',
                    'Nonflavanoid phenols',
                    'Proanthocyanins',
                    'Color intensity', 'Hue',
                    'OD280/OD315 of diluted wines',
                    'Proline']

# Pegando (X, y)
X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values

# Dividindo conjunto de dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=10, stratify=y)

# Normalizando os dados com zscore
zscore = StandardScaler()
Xz_train = zscore.fit_transform(X_train)
Xz_test  = zscore.transform(X_test)

# Definindo um grid de busca
grid = [{'solver': ['newton-cg', 'lbfgs', 'liblinear'],
        'C': [1, 10, 100, 1000],
        'max_iter': [100, 1000, 10000]}]

# Buscando melhores hiperparâmetros com validação cruzada
k = 3
gs = GridSearchCV(LogisticRegression(multi_class='ovr'), grid, cv=k)
gs.fit(Xz_train, y_train)

# Melhor score
print('Melhor score de validação: %.3f' % (gs.best_score_*100))
print('Melhores hiperparâmetros:', gs.best_params_)

# Validação final do modelo
clf = gs.best_estimator_
clf.fit(Xz_train, y_train)
print('Acurácia de treinamento: %.3f' % (np.mean(gs.score(Xz_train, y_train))*100))
print('Acurácia de teste: %.3f' % (np.mean(gs.score(Xz_test, y_test))*100))

Melhor score de validação: 98.675
Melhores hiperparâmetros: {'C': 1, 'max_iter': 100, 'solver': 'liblinear'}
Acurácia de treinamento: 100.000
Acurácia de teste: 100.000
