# Classificação de Cogumelos 🍄

Este notebook aplica algoritmos clássicos de Machine Learning para prever se um cogumelo é comestível ou venenoso com base em atributos visuais e estruturais.

In [None]:
# Imports principais
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from joblib import dump

## 1. Carregamento dos dados

In [None]:
# URL direta do dataset da UCI
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"

# Nomes das colunas
columns = [
    "class", "cap-shape", "cap-surface", "cap-color", "bruises", "odor",
    "gill-attachment", "gill-spacing", "gill-size", "gill-color",
    "stalk-shape", "stalk-root", "stalk-surface-above-ring", "stalk-surface-below-ring",
    "stalk-color-above-ring", "stalk-color-below-ring", "veil-type", "veil-color",
    "ring-number", "ring-type", "spore-print-color", "population", "habitat"
]

# Leitura e limpeza
df = pd.read_csv(url, header=None, names=columns)
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)
df.drop(columns=['veil-type'], inplace=True)  # coluna constante

## 2. Pré-processamento e divisão dos dados

In [None]:
X = df.drop("class", axis=1)
y = df["class"]

# Divisão treino/teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Pré-processamento
categorical_features = X.columns.tolist()
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
])

## 3. Treinamento dos Modelos

In [None]:
models = {
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC()
}

results = []
for name, model in models.items():
    clf = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", model)])
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, pos_label='p'),
        "Recall": recall_score(y_test, y_pred, pos_label='p'),
        "F1 Score": f1_score(y_test, y_pred, pos_label='p')
    })

pd.DataFrame(results).sort_values("F1 Score", ascending=False)

## 4. Exportação do melhor modelo

In [None]:
# Exporta modelo de Árvore de Decisão (exemplo)
best_model = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", DecisionTreeClassifier(random_state=42))
])
best_model.fit(X_train, y_train)
dump(best_model, "mushroom_model.joblib")