In [19]:
# Carregando o dataset
from sklearn.datasets import load_wine
wine = load_wine()

In [20]:
# Exemplos de acesso aos dados
X = wine.data[:, ] # Features de cada elemento
y = wine.target # Classes de cada elemento

In [21]:
# Separando os dados para testar
from sklearn.model_selection import train_test_split as Split
X_train, X_test, y_train, y_test = Split(X, y, random_state=42, test_size=0.2)

In [22]:
# Random Forests
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)

In [23]:
# Métricas
from sklearn.metrics import accuracy_score, recall_score, precision_score

In [24]:
# Métricas do Random Forests
rfc_acc = round(accuracy_score(y_test, y_pred), 6)
rfc_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
rfc_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [25]:
# Instancie
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [26]:
# Métricas do KNN
knn_acc = round(accuracy_score(y_test, y_pred), 6)
knn_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
knn_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [27]:
# Comparação
print('KNN Vs Random Forests')
print(f'Classes: {wine.target_names}')
print(f'Acurrácia: {knn_acc} vs {rfc_acc}')
print(f'Recall: {knn_recall} vs {rfc_recall}')
print(f'Precisão: {knn_precision} vs {rfc_precision}')

KNN Vs Random Forests
Classes: ['class_0' 'class_1' 'class_2']
Acurrácia: 0.805556 vs 1.0
Recall: 0.805556 vs 1.0
Precisão: 0.823148 vs 1.0


In [28]:
# Na validação cruzada
from sklearn.model_selection import cross_val_score

cv_rfc = cross_val_score(rfc, X, y)
cv_knn = cross_val_score(knn, X, y)
print(f'Validação Cruzada: {cv_rfc} vs {cv_knn}')

Validação Cruzada: [0.97222222 0.94444444 0.97222222 0.97142857 1.        ] vs [0.63888889 0.69444444 0.66666667 0.65714286 0.85714286]


In [29]:
sum_cv_rfc = 0
for cv_score in cv_rfc:
  sum_cv_rfc += cv_score

print(f'Resultado Random Forest: {round((sum_cv_rfc/5) * 100, 2)}%')

Resultado Random Forest: 97.21%


In [30]:
sum_cv_knn = 0
for cs_score in cv_knn:
  sum_cv_knn += cs_score

print(f'Resultado KNN: {round((sum_cv_knn/5) * 100, 2)}%')

Resultado KNN: 70.29%


In [31]:
# Buscando hiper parâmetros
from sklearn.model_selection import GridSearchCV

In [32]:
#RFC
parameters = {'min_samples_split':(2,6)}  # Vai do valor 2 a 6
rfc_hps = GridSearchCV(rfc, parameters)  # instancia do classificador
rfc_hps.fit(X, y)
print(f'Melhor valor para min_samples_split: {rfc_hps.best_params_["min_samples_split"]}')

Melhor valor para min_samples_split: 6


In [33]:
#KNN
parameters = {'n_neighbors':(1,20)}  # Vai do valor 1 a 20
knn_hps = GridSearchCV(knn, parameters)  # instancia do classificador
knn_hps.fit(X, y)
print(f'Melhor valor para min_samples_split: {knn_hps.best_params_["n_neighbors"]}')

Melhor valor para min_samples_split: 1
