In [None]:
# Importando as bibliotecas
import warnings
warnings.filterwarnings("ignore")

# Carregando o dataset
from sklearn.datasets import load_wine
wine = load_wine()

In [None]:
# Exemplo de acesso aos dados
X = wine.data[:, :] # Features de cada elemento
y = wine.target # Classes de cada elemento

In [None]:
# Separa dados para treinar
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# O uso desssa função facilita, mas não é obrigatório. Você pode dividir os seus dados manualmente.

In [None]:
# Carregando e treinando os classificadores
# Random Forests

from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)

In [None]:
# Métricas do Random Forests
from sklearn.metrics import  accuracy_score, recall_score, precision_score

rfc_acc = round(accuracy_score(y_test, y_pred), 6) # round é para arredondar
rfc_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
rfc_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [None]:
# Métricas do KNN

knn_acc = round(accuracy_score(y_test, y_pred), 6)
knn_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
knn_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [None]:
# Comparação
print('KNN vs Random Forest\n')
print('Classes: {0}\n'.format(wine.target_names))
print('Acurácia: {0} vs {1}'.format(knn_acc, rfc_acc))
print('Recall: {0} vs {1}'.format(knn_recall, rfc_recall))
print('Precisão: {0} vs {1}'.format(knn_precision, rfc_precision))

In [None]:
# Validação cruzada
from sklearn.model_selection import cross_val_score
cv_rfc = cross_val_score(rfc, X, y)
cv_knn = cross_val_score(knn, X, y)
print('\nValidação Cruzada: {0} vs {1}'.format(cv_knn, cv_rfc))

In [None]:
sum_cv_rfc = 0
for cv_score in cv_rfc:
  sum_cv_rfc += cv_score
print('\nResultado Random Forest: {0}'.format(sum_cv_rfc/5))

In [None]:
sum_cv_knn = 0
for cv_score in cv_knn:
  sum_cv_knn += cv_score
print('\nResultado KNN: {0}'.format(sum_cv_knn/5))

In [None]:
# Otimização de Hiperparâmetros
from sklearn.model_selection import GridSearchCV

# RFC
parameters = {'min_samples_split': (2, 6)}
rfc_hps = GridSearchCV(rfc, parameters)
rfc_hps.fit(X, y)
print('Melhor valor para min_samples_split: {0}'.format(rfc_hps.best_params_['min_samples_split']))

# KNN
parameters = {'n_neighbors': (1, 20)}
knn_hps = GridSearchCV(knn, parameters)
knn_hps.fit(X, y)
print('Melhor valor para n_neighbors: {0}'.format(knn_hps.best_params_['n_neighbors']))