# **Image Classification for Mushroom Species**

## **Loading the Datasets**

### **Import TensorFlow and other libraries**

In [None]:
# Imports needed
import numpy as np
import PIL

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

### **Load in the dataset**

In [None]:
import pathlib
dataset_directory = pathlib.Path("MushroomDatasets")

image_count = len(list(dataset_directory.glob('*/*.jpg')))
print(image_count)

### **Some Examples of Images in the Dataset**

Agaricus bisporus (J.E.Lange) Imbach:

In [None]:
agaricus_bisporus = list(dataset_directory.glob('Agaricus bisporus (J.E.Lange) Imbach/*'))
PIL.Image.open(str(agaricus_bisporus[0]))

Hericium erinaceus (Bull.) Pers:

In [None]:
hericium_erinaceus = list(dataset_directory.glob('Hericium erinaceus (Bull.) Pers/*'))
PIL.Image.open(str(hericium_erinaceus[0]))

Suillus luteus (L.) Roussel:

In [None]:
suillus_luteus = list(dataset_directory.glob('Suillus luteus (L.) Roussel/*'))
PIL.Image.open(str(suillus_luteus[0]))

## **Load Data Using a Keras Utility**

### **Creating the Dataset**

In [None]:
batch_size = 16
img_height = 224
img_width = 224

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_directory,
    labels="inferred",
    label_mode="int",
    validation_split=0.2,
    subset="training",
    seed=123,
    shuffle="True",
    image_size=(img_height, img_width),
    batch_size=batch_size)

In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_directory,
    labels="inferred",
    label_mode="int",
    validation_split=0.2,
    subset="validation",
    seed=123,
    shuffle="True",
    image_size=(img_height, img_width),
    batch_size=batch_size)

In [None]:
class_names = train_ds.class_names
print(class_names)

### **Visualize the Data**

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 20))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [None]:
for image_batch, labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

### **Configuring the Dataset for Performance**

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

### **Standardize the Data**

In [None]:
normalization_layer = layers.Rescaling(1./255)

In [None]:
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

### **Augment the Data**

In [None]:
data_augmentation = keras.Sequential([
        layers.RandomFlip("horizontal_and_vertical",
                        input_shape=(img_height,
                                    img_width,
                                    3)),
        layers.RandomRotation(0.2),
        layers.RandomZoom(0.2),
    ])

In [None]:
plt.figure(figsize=(20, 20))
for images, _ in train_ds.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

## **Training the Model**

### **Create the Model**

In [None]:
num_classes = len(class_names)

model = Sequential([
  data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

### **Compile the Model**

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

### **Model Summary**

In [None]:
model.summary()

### **Define the Callback**

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint = ModelCheckpoint("Model.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq="epoch")
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=40, verbose=1, mode='auto')
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1,
                              mode='max', cooldown=2, patience=2, min_lr=0)

### **Train the Model**

In [None]:
epochs = 50
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    callbacks=[checkpoint, early, reduce_lr],
)

### **Visualize Training Results**

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training Accuracy')
plt.title('Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training Loss')
plt.title('Validation Loss')
plt.show()

## **Predict on New Data**

In [None]:
testing_directory = pathlib.Path("TestingDatasets")

testing_files = list(testing_directory.glob('*.jpg'))

for file in testing_files:
    image = tf.keras.preprocessing.image.load_img(file, target_size=(img_height, img_width))

    image_array = tf.keras.utils.img_to_array(image)
    image_array = tf.expand_dims(image_array, 0) # Create a batch

    predictions = model.predict(image_array)
    score = tf.nn.softmax(predictions[0])

    print(
        "This {} most likely belongs to {} with a {:.2f} percent confidence."
        .format(file, class_names[np.argmax(score)], 100 * np.max(score))
    ) 