# Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
import seaborn as sn

# Getting our datasets

In [None]:
image_size = (224, 224)
batch_size = 32

In [None]:
train = tf.keras.preprocessing.image_dataset_from_directory(
    "data/train",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)

In [None]:
val = tf.keras.preprocessing.image_dataset_from_directory(
    "data/valid",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)

# Data Augmentation

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomContrast(0.5),
        layers.RandomZoom(0.3)
    ]
)

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

# Model

First we will get our MobileNetV2 model and use as a base.

## MobileNetV2

In [None]:
base_model = tf.keras.applications.MobileNetV2(
    input_shape=image_size + (3,),
    include_top=False,
    weights='imagenet')

In [None]:
image_batch, label_batch = next(iter(train))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

In [None]:
# Freezing our MobileNetV2
base_model.trainable = False

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
'''
Preprocessed numpy.array or a tf.Tensor with type float32.
The inputs pixel values are scaled between -1 and 1, sample-wise.
'''
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

In [None]:
inputs = tf.keras.Input(shape=image_size + (3,))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(512,activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(15)(x)
model = tf.keras.Model(inputs, outputs)

In [None]:
model.summary()

In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
loss0, accuracy0 = model.evaluate(val)

In [None]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

# Training

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler 
'''
EarlyStopping: Stop training when a monitored metric has stopped improving.
In this case, it is the default, val_loss
min_delta: Minimum change in the monitored quantity to qualify as an improvement
patience: patience

In this case, we are working only with 10 epochs, but in case there are more, it will be important.
'''
EarlyStop_callback = EarlyStopping(min_delta=0.001, patience=10, restore_best_weights=True)

In [None]:
history = model.fit(train,
                    epochs=12,
                    validation_data=val,
                   callbacks = [EarlyStop_callback])

# Results

In [None]:
img = keras.preprocessing.image.load_img(
    "data/test/Ladybird Mimic Spider/1.jpg", target_size=image_size
)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)  # Create batch axis

predictions = model.predict(img_array)
score = predictions[0]
score

In [None]:
test = tf.keras.preprocessing.image_dataset_from_directory(
    "data/test",
    seed=1337,
    image_size=image_size)

class_names = [
    'Black Widow',
    'Blue Tarantula',
    'Bold Jumper',
    'Brown Grass Spider',
    'Brown Recluse Spider',
    'Deinopis Spider',
    'Golden Orb Weaver',
    'Hobo Spider',
    'Huntsman Spider',
    'Ladybird Mimic Spider',
    'Peacock Spider',
    'Red Knee Tarantula',
    'Spiny-backed Orb-weaver',
    'White Kneed Tarantula',
    'Yellow Garden Spider'
]

In [None]:
predictions = []
image_ids = []
for image_batch, label_batch in test:
    batch_predictions = model.predict_on_batch(image_batch)
    batch_predictions = tf.nn.softmax(batch_predictions)
    batch_predictions = batch_predictions.numpy()
    
    class_indices = np.argmax(batch_predictions, axis = 1)
    predicted_class = []
    for index in class_indices:
        predicted_class.append(str(class_names[index]))
    predictions += predicted_class
    
    str_img_ids = []
    for img_id in label_batch:
        str_img_ids.append(class_names[img_id])
    
#     image_ids += label_batch.numpy().tolist()
    image_ids += str_img_ids

In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
results = pd.DataFrame({'actual': image_ids, 'prediction': predictions})
results

In [None]:
df = pd.DataFrame(results, columns=['actual','prediction'])
confusion_matrix = pd.crosstab(df['actual'], df['prediction'], rownames=['Actual'], colnames=['Predicted'])

sn.heatmap(confusion_matrix, annot=True, cmap='Greens')
plt.show()

# Credits

Our work is based on these authors:
https://keras.io/examples/vision/image_classification_from_scratch/
https://www.kaggle.com/pranjalkumarnandi/baseline-with-keras-tf/data
https://www.kaggle.com/enesaltun/spiders-resnet18
https://www.kaggle.com/gpiosenka/inceptionresnetv2-98-acc