In [12]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
import sys

sys.path.append('..')

# Set the path to your images_reshaped directory
base_path = '../images_reshaped'

# Initialize lists to store data
data = []

# Iterate through the directory structure
for category in ['deadly', 'edible', 'poisonous', 'conditionally_edible']:
    category_path = os.path.join(base_path, category)
    for species_folder in os.listdir(category_path):
        species_path = os.path.join(category_path, species_folder)
        if os.path.isdir(species_path):
            for image_file in os.listdir(species_path):
                if image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(species_path, image_file)
                    
                    # Load and preprocess the image
                    img = Image.open(image_path)
                    img_array = np.array(img) / 255.0  # Normalize to [0, 1]
                    
                    data.append({
                        'image_path': image_path,
                        'category': category,
                        'species': species_folder,
                        'image': img_array
                    })

# Create the DataFrame
df = pd.DataFrame(data)

# Add the 'edible' column
df['edible'] = (df['category'] == 'edible').astype(int)

# Encode categories and species
le_category = LabelEncoder()
le_species = LabelEncoder()
df['category_encoded'] = le_category.fit_transform(df['category'])
df['species_encoded'] = le_species.fit_transform(df['species'])

# Save the DataFrame without the 'image' column
df_save = df.drop(columns=['image'])
df_save.to_pickle('mushroom_metadata.pkl')

# Save the image data separately
np.save('mushroom_images.npy', np.stack(df['image'].values))

print("Data preprocessing completed.")
print(f"Metadata saved as 'mushroom_metadata.pkl'.")
print(f"Image data saved as 'mushroom_images.npy'.")
print(f"Total images processed: {len(df)}")
print(f"Number of edible mushrooms: {df['edible'].sum()}")
print(f"Number of non-edible mushrooms: {len(df) - df['edible'].sum()}")
print(f"Number of unique species: {df['species'].nunique()}")
print(f"Image shape: {df['image'].iloc[0].shape}")

Data preprocessing completed.
Metadata saved as 'mushroom_metadata.pkl'.
Image data saved as 'mushroom_images.npy'.
Total images processed: 8345
Number of edible mushrooms: 2433
Number of non-edible mushrooms: 5912
Number of unique species: 247
Image shape: (256, 256, 3)


In [14]:
df.head()

Unnamed: 0,image_path,category,species,image,edible,category_encoded,species_encoded
0,../images_reshaped/deadly/Lepiota_castanea/Lep...,deadly,Lepiota_castanea,"[[[0.2823529411764706, 0.2980392156862745, 0.1...",0,1,177
1,../images_reshaped/deadly/Lepiota_castanea/Lep...,deadly,Lepiota_castanea,"[[[0.5607843137254902, 0.49411764705882355, 0....",0,1,177
2,../images_reshaped/deadly/Lepiota_castanea/Lep...,deadly,Lepiota_castanea,"[[[0.1411764705882353, 0.08627450980392157, 0....",0,1,177
3,../images_reshaped/deadly/Lepiota_castanea/Lep...,deadly,Lepiota_castanea,"[[[0.2, 0.37254901960784315, 0.180392156862745...",0,1,177
4,../images_reshaped/deadly/Lepiota_castanea/Lep...,deadly,Lepiota_castanea,"[[[0.00392156862745098, 0.043137254901960784, ...",0,1,177


In [6]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load the data
images = np.load('mushroom_images.npy')
labels = np.load('mushroom_metadata.pkl', allow_pickle=True)['edible']  # Assume this is a binary array (0 for non-edible, 1 for edible)

# Prepare X and y
X = images
y = labels

# Split the data into train+val and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Split train+val into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, stratify=y_train_val, random_state=42)

# Define the model building function
def build_model(learning_rate=0.001, dense_units=256):
    base_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
    
    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False
    
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(dense_units, activation='relu')(x)
    output = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=base_model.input, outputs=output)
    
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Create the model
model = build_model(learning_rate=0.001, dense_units=256)

# Define callbacks
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.5, patience=3)
]

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=callbacks,
)

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy:.4f}")

# Save the model
model.save('mushroom_classification_model.h5')

# Function to preprocess a single image for prediction
def preprocess_image(image_path):
    image = tf.keras.preprocessing.image.load_img(image_path, target_size=(256, 256))
    image_array = tf.keras.preprocessing.image.img_to_array(image)
    image_array = np.expand_dims(image_array, axis=0)
    image_array /= 255.0  # Normalize to [0, 1]
    return image_array

# Function to predict on a new image
def predict_mushroom(model, image_path):
    processed_image = preprocess_image(image_path)
    prediction = model.predict(processed_image)
    return "Edible" if prediction[0][0] > 0.5 else "Non-edible"

# Example usage (uncomment to use):
# new_image_path = 'path/to/new/mushroom/image.jpg'
# result = predict_mushroom(model, new_image_path)
# print(f"The mushroom is predicted to be: {result}")

UnpicklingError: Failed to interpret file 'mushroom_metadata.pkl' as a pickle