In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle

In [None]:
df = pd.read_csv("mushrooms.csv")
print("Dataset shape:", df.shape)
df.head()

In [None]:

X = df.drop(columns=["class"])
y = df["class"]

le = LabelEncoder()
y_enc = le.fit_transform(y)

categorical_cols = list(X.columns)

ct = ColumnTransformer([
    ("ohe", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_cols)
])


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_enc, test_size=0.2, random_state=42, stratify=y_enc
)
model = Pipeline([
    ("ohe", ct),
    ("dt", DecisionTreeClassifier(
        max_depth=3,
        min_samples_leaf=10,
        random_state=42
    ))
])

model.fit(X_train, y_train)



In [None]:
y_pred = model.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

with open("mushroom_model.pkl", "wb") as f:
    pickle.dump({
        "model": model,
        "label_encoder": le,
        "features": categorical_cols
    }, f)

print("\nModel saved as mushroom_model.pkl")