In [16]:
# --- 1. Imports ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, classification_report
import joblib
from tensorflow import keras
from tensorflow.keras import layers


In [17]:
# --- 2. Load Dataset ---
# Assuming you have mushroom.csv with columns matching UCI dataset
data = pd.read_csv('agaricus-lepiota.csv', header=None)

# Assign column names from agaricus-lepiota.names file "Mushroom Database"
column_names = [
    'class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
    'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
    'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
    'stalk-surface-below-ring', 'stalk-color-above-ring',
    'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
    'ring-type', 'spore-print-color', 'population', 'habitat'
]
data.columns = column_names

# Target column: 'class' (e=edible, p=poisonous)
X_raw = data.drop("class", axis=1)
y_raw = data["class"]



In [18]:
# --- 3. Encode Labels ---
# Convert target to numeric (edible=1, poisonous=0)
y = np.where(y_raw == "e", 1, 0)


In [19]:
# --- 4. OneHot Encode Features ---
encoder = OneHotEncoder(handle_unknown="ignore")
X_encoded = encoder.fit_transform(X_raw)



In [20]:
# --- 5. Apply PCA ---
pca = PCA(n_components=40)   # adjust components if needed
X_pca = pca.fit_transform(X_encoded.toarray())


In [21]:

# --- 6. Save Preprocessing Objects ---
joblib.dump(encoder, "encoder.pkl")
joblib.dump(pca, "pca.pkl")



['pca.pkl']

In [22]:
# --- 7. Train/Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)


In [23]:
# --- 8. Build Neural Network ---
model = keras.Sequential([
    layers.Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
# --- 9. Train Model ---
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)


Epoch 1/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9331 - loss: 0.2311 - val_accuracy: 0.9838 - val_loss: 0.0444
Epoch 2/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9954 - loss: 0.0220 - val_accuracy: 0.9985 - val_loss: 0.0117
Epoch 3/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9998 - loss: 0.0065 - val_accuracy: 1.0000 - val_loss: 0.0048
Epoch 4/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0028 - val_accuracy: 1.0000 - val_loss: 0.0027
Epoch 5/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0015 - val_accuracy: 1.0000 - val_loss: 0.0015
Epoch 6/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 9.0790e-04 - val_accuracy: 1.0000 - val_loss: 9.7987e-04
Epoch 7/20
[1m163/1

In [25]:

# --- 10. Evaluate ---
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[[782   0]
 [  0 843]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       782
           1       1.00      1.00      1.00       843

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



In [26]:

# --- 11. Save Model ---
model.save("pca_mushroom_model.h5")


