In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Carregar dados
df = pd.read_csv('../dados/seeds_tratado.csv')

X = df.drop('Class', axis=1)
y = df['Class']

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [None]:
# GridSearch para KNN
param_knn = {'n_neighbors': list(range(1, 21))}
grid_knn = GridSearchCV(KNeighborsClassifier(), param_knn, cv=5)
grid_knn.fit(X_train, y_train)

best_knn = grid_knn.best_estimator_
y_pred_knn = best_knn.predict(X_test)

print("Melhores parâmetros KNN:", grid_knn.best_params_)
print("Acurácia:", accuracy_score(y_test, y_pred_knn))
print("Relatório de Classificação:")
print(classification_report(y_test, y_pred_knn, target_names=le.classes_))
sns.heatmap(confusion_matrix(y_test, y_pred_knn), annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Matriz de Confusão - KNN Otimizado")
plt.xlabel("Previsto")
plt.ylabel("Real")
plt.show()

In [None]:
# GridSearch para SVM
param_svm = {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'rbf', 'poly']}
grid_svm = GridSearchCV(SVC(), param_svm, cv=5)
grid_svm.fit(X_train, y_train)

best_svm = grid_svm.best_estimator_
y_pred_svm = best_svm.predict(X_test)

print("Melhores parâmetros SVM:", grid_svm.best_params_)
print("Acurácia:", accuracy_score(y_test, y_pred_svm))
print("Relatório de Classificação:")
print(classification_report(y_test, y_pred_svm, target_names=le.classes_))
sns.heatmap(confusion_matrix(y_test, y_pred_svm), annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Matriz de Confusão - SVM Otimizado")
plt.xlabel("Previsto")
plt.ylabel("Real")
plt.show()

In [None]:
# GridSearch para Random Forest
param_rf = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 5, 10, None]
}
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_rf, cv=5)
grid_rf.fit(X_train, y_train)

best_rf = grid_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test)

print("Melhores parâmetros Random Forest:", grid_rf.best_params_)
print("Acurácia:", accuracy_score(y_test, y_pred_rf))
print("Relatório de Classificação:")
print(classification_report(y_test, y_pred_rf, target_names=le.classes_))
sns.heatmap(confusion_matrix(y_test, y_pred_rf), annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Matriz de Confusão - Random Forest Otimizado")
plt.xlabel("Previsto")
plt.ylabel("Real")
plt.show()