In [None]:
%conda install -y gdown

In [None]:
from IPython.display import FileLinks, FileLink
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Rescaling, Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.utils import plot_model, load_img, img_to_array
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.config import list_physical_devices
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import gdown
import zipfile
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os

In [None]:
url = 'https://drive.google.com/uc?id=1Oc_O7dtjeGtfApX9Qv15AxUqVyzAssHD'
output = 'amanita.zip'
gdown.download(url, output)

url = 'https://drive.google.com/uc?id=1CUsxZERiIvV9BHnaSxMyhfjP6jLhtbVP'
output = 'boletus.zip'
gdown.download(url, output)

url = 'https://drive.google.com/uc?id=11ICEn_OFhwd78mUemOYb146WcqFIhlv8'
output = 'cantharellus.zip'
gdown.download(url, output)

url = 'https://drive.google.com/uc?id=18I07wDMnJOG1oADg2fFR4jVFiIqgPG3r'
output = 'morchella.zip'
gdown.download(url, output)

In [None]:
zip_ref = zipfile.ZipFile("/kaggle/working/amanita.zip", 'r')
zip_ref.extractall("/kaggle/working/data/dataset_augmented")
zip_ref.close()

zip_ref = zipfile.ZipFile("/kaggle/working/boletus.zip", 'r')
zip_ref.extractall("/kaggle/working/data/dataset_augmented")
zip_ref.close()

zip_ref = zipfile.ZipFile("/kaggle/working/cantharellus.zip", 'r')
zip_ref.extractall("/kaggle/working/data/dataset_augmented")
zip_ref.close()

zip_ref = zipfile.ZipFile("/kaggle/working/morchella.zip", 'r')
zip_ref.extractall("/kaggle/working/data/dataset_augmented")
zip_ref.close()

In [None]:
os.listdir('/kaggle/working/data/dataset_augmented')

In [None]:
os.listdir('/kaggle/working/data/dataset_augmented/test')

In [None]:
os.listdir('/kaggle/working/data/dataset_augmented/train')

In [None]:
batch_size = 64
img_height = 224
img_width = 224
epochs = 100
seed = 27
validation_split = 0.2

training_data_path = '/kaggle/working/data/dataset_augmented/train'
test_data_path = '/kaggle/working/data/dataset_augmented/test'
model_path = '/kaggle/working/models/final_models/vgg16.h5'

class_names = ['amanita', 'boletus', 'cantharellus', 'morchella']

In [None]:
print("Num GPUs Available: ", len(list_physical_devices('GPU')))

## Training and validation data

In [None]:
training_data = image_dataset_from_directory(
    directory=training_data_path,
    validation_split=validation_split,
    subset='training',
    labels='inferred',
    class_names=class_names,
    label_mode='int',
    batch_size=batch_size,
    image_size=(img_height, img_width),
    seed=seed,
    shuffle=True
)

validation_data = image_dataset_from_directory(
    directory=training_data_path,
    validation_split=validation_split,
    subset='validation',
    labels='inferred',
    class_names=class_names,
    label_mode='int',
    batch_size=batch_size,
    image_size=(img_height, img_width),
    seed=seed,
    shuffle=True
)

## VGG16 architecture

In [None]:
# VGG16 architecture
model = Sequential([
    # Rescaling layer
    Rescaling(
        scale=1. / 255,
        input_shape=(img_height, img_width, 3),
        name='rescale'
    ),
    
    # CONV -> CONV -> POOL
    Conv2D(
        filters=64,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_1_1'
    ),
    Conv2D(
        filters=64,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_1_2'
    ),
    MaxPooling2D(
        pool_size=(2, 2),
        strides=(2, 2),
        name='maxpool2d_1'
    ),
    Dropout(
        rate=0.2,
        name='dropout_1'
    ),
    
    # CONV -> CONV -> POOL
    Conv2D(
        filters=128,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_2_1'
    ),
    Conv2D(
        filters=128,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_2_2'
    ),
    MaxPooling2D(
        pool_size=(2, 2),
        strides=(2, 2),
        name='maxpool2d_2'
    ),
    Dropout(
        rate=0.2,
        name='dropout_2'
    ),
    
    # CONV -> CONV -> CONV -> POOL
    Conv2D(
        filters=256,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_3_1'
    ),
    Conv2D(
        filters=256,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_3_2'
    ),
    Conv2D(
        filters=256,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_3_3'
    ),
    MaxPooling2D(
        pool_size=(2, 2),
        strides=(2, 2),
        name='maxpool2d_3'
    ),
    Dropout(
        rate=0.2,
        name='dropout_3'
    ),

    # CONV -> CONV -> CONV -> POOL
    Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_4_1'
    ),
    Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_4_2'
    ),
    Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_4_3'
    ),
    MaxPooling2D(
        pool_size=(2, 2),
        strides=(2, 2),
        name='maxpool2d_4'
    ),
    Dropout(
        rate=0.2,
        name='dropout_4'
    ),
    
    # CONV -> CONV -> CONV -> POOL
    Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_5_1'
    ),
    Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_5_2'
    ),
    Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu',
        name='conv2d_5_3'
    ),
    MaxPooling2D(
        pool_size=(2, 2),
        strides=(2, 2),
        name='maxpool2d_5'
    ),
    Dropout(
        rate=0.2,
        name='dropout_5'
    ),

    # Flattening -> FC -> FC -> FC
    Flatten(
        name='flatten'
    ),
    Dense(
        units=4096, 
        activation='relu',
        name='fc_1'
    ),
    Dropout(
        rate=0.5,
        name='dropout_6'
    ),
    Dense(
        units=4096, 
        activation='relu',
        name='fc_2'
    ),
    Dropout(
        rate=0.5,
        name='dropout_7'
    ),
    Dense(
        units=len(class_names), # number of classes 
        activation='softmax',
        name='fc_output'
    )
])

### Compile model and see its summary

In [None]:
model.compile(
    optimizer=SGD(learning_rate=0.001, momentum=0.9),
    loss=SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

plot_model(
    model, 
    to_file='vgg16_plot.png', 
    show_shapes=True,
    show_layer_names=True
)

### Define callbacks

In [None]:
early_stopping_callback = EarlyStopping(
    monitor='val_loss', 
    mode='min', 
    verbose=1,
    patience=10,
    restore_best_weights=True
)

callbacks = [
    early_stopping_callback
]

### Fit and train

In [None]:
history = model.fit(
    training_data,
    validation_data=validation_data,
    epochs=epochs,
    callbacks=callbacks,
    shuffle=True
)

### Save the model

In [None]:
model.save(model_path)

### Load metrics to dataframe

In [None]:
metrics_df = pd.DataFrame(history.history)

### Accuracy plot

In [None]:
metrics_df[["accuracy", "val_accuracy"]].plot()
plt.title('Training and Validation Accuracy')
plt.savefig('accuracy.png')
plt.show()

### Loss plot

In [None]:
metrics_df[["loss", "val_loss"]].plot()
plt.title('Training and Validation Loss')
plt.savefig('loss.png')
plt.show()

## Test the model

### Define prediction function

In [None]:
def predict_and_evaluate(images, labels, class_names):
    test_data_batch = np.array(images)
    print(test_data_batch.shape)
    
    predictions = model.predict(test_data_batch)
    predicted_labels = np.argmax(predictions, axis = 1)
    
    hits = 0

    for predicted_label, prediction, label in zip(predicted_labels, predictions, labels):        
        probability = np.max(prediction) * 100
        print(
            "{} with {:.2f}% probability (real class: {})".format(
                class_names[predicted_label],
                probability,
                class_names[label]
            )
        )
        
        hits = hits + 1 if label == predicted_label else hits      
        
    accuracy = (hits / len(labels)) * 100
    return accuracy, predicted_labels
        

### Load test data

In [None]:
images = list()
labels = list()

for class_name in class_names:
    class_path = os.path.join(test_data_path, class_name)
    for image in os.listdir(class_path):
        image_path = os.path.join(class_path, image)
        image_data = load_img(
            image_path,
            target_size=(img_height, img_width),
            color_mode='rgb'
        )
        image_array = img_to_array(image_data)
        images.append(image_array)
        labels.append(class_names.index(class_name))


### Run prediction

In [None]:
accuracy, predicted_labels = predict_and_evaluate(images, labels, class_names)
print("Accuracy is {:.2f}%".format(accuracy))

In [None]:
evaluation = model.evaluate(
    x=np.array(images),
    y=np.array(labels),
    batch_size=batch_size
)
print("Loss is {:.4f}".format(evaluation[0]))
print("Accuracy is {:.2f}%".format(100 * evaluation[1]))

In [None]:
print(classification_report(labels, predicted_labels, target_names=class_names))

### Confusion matrix

In [None]:
matrix = confusion_matrix(labels, predicted_labels)

sns.heatmap(
    matrix,
    square=True,
    annot=True,
    cbar=False,
    cmap=plt.cm.Blues,
    xticklabels=class_names,
    yticklabels=class_names
)

plt.xlabel('Predicted Classes')
plt.ylabel('True Classes')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')
plt.show();

### Download the model

In [None]:
os.chdir('/kaggle/working')

In [None]:
os.getcwd()

In [None]:
FileLinks('./models')

In [None]:
FileLink('vgg16_plot.png')

In [None]:
FileLink('accuracy.png')

In [None]:
FileLink('loss.png')

In [None]:
FileLink('confusion_matrix.png')