## Data Loading

In [98]:
import tensorflow as tf

BATCH_SIZE = 32
IMG_WIDTH  = 96 
IMG_HEIGHT = 96 
DATASET_DIR = "../input/animals10/raw-img"

In [99]:
animals_dataset = tf.keras.preprocessing.image_dataset_from_directory(
  DATASET_DIR,
  seed = 37,
  image_size = (IMG_HEIGHT, IMG_WIDTH),
  batch_size = BATCH_SIZE, 
  labels = 'inferred'
)

Found 26179 files belonging to 10 classes.


## Data Exploration

In [100]:
import pandas as pd

In [101]:
def countTuplesByClass(x,y):
    dataset_unbatched = tuple(x.unbatch())
    labels = []
    for (image,label) in dataset_unbatched:
        labels.append(label.numpy())
    labels = pd.Series(labels)
    counts = labels.value_counts()
    
    for i in range(len(counts)):
        print(y[i] + "\t\t" + str(counts[i]))

In [102]:
animals_names = animals_dataset.class_names
animals_names

['cane',
 'cavallo',
 'elefante',
 'farfalla',
 'gallina',
 'gatto',
 'mucca',
 'pecora',
 'ragno',
 'scoiattolo']

In [103]:
countTuplesByClass(animals_dataset,animals_names)

KeyboardInterrupt: 

In [None]:
animals_dataset.take(1)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(25, 28))

for images, labels in animals_dataset.take(1):
    for i in range(30):
        ax = plt.subplot(8, 10, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(animals_names[labels[i]])

## Data Transformation

In [None]:
# An function for obtaining balanced sets for modeling and checking accuracy
def get_dataset_partitions_tf(ds, ds_size, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
    assert (train_split + test_split + val_split) == 1
    
    if shuffle:
        # Specify seed to always have the same split distribution between runs
        ds = ds.shuffle(shuffle_size, seed=12)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return val_ds, test_ds, train_ds

In [None]:
# Because I use a batchdataset the size is calculated in the number of batches. Thefore I need to calculate the size to be used in the function.
# len / batch size
int(26179 / BATCH_SIZE)

In [None]:
len(animals_dataset)

In [None]:
validation_set, test_set, training_set = get_dataset_partitions_tf(animals_dataset,int(26179 / BATCH_SIZE),train_split=0.6, val_split=0.2, test_split=0.2, shuffle=True, shuffle_size=10000)

In [None]:
countTuplesByClass(training_set, animals_names)

In [None]:
countTuplesByClass(validation_set, animals_names)

In [None]:
countTuplesByClass(test_set, animals_names)

### Hot Encoding

In [None]:
tf.one_hot([0,1,2], len(animals_names))

In [None]:
def fixing_images(images,y):
    return images/255, tf.one_hot(y, len(animals_names))

In [None]:
training_set = training_set.prefetch(128)
validation_set = validation_set.prefetch(128)
test_set = test_set.prefetch(128)

In [None]:
X_train = training_set.map(lambda x, y : fixing_images(x,y))

In [None]:
X_valid = validation_set.map(lambda x, y : fixing_images(x,y))

In [None]:
X_test = test_set.map(lambda x, y : fixing_images(x,y))

## Modelling
### Creating Model

In [None]:
regularizer = tf.keras.regularizers.l1_l2(0, 0.001)

In [None]:
model = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(32, input_shape = [IMG_WIDTH, IMG_HEIGHT, 3], kernel_size = 3, 
                                padding = 'same', kernel_regularizer=regularizer, activation=tf.keras.activations.relu),
            tf.keras.layers.MaxPool2D(),
            tf.keras.layers.Conv2D(64, kernel_size = 3,
                                padding = 'same', kernel_regularizer=regularizer, activation=tf.keras.activations.relu),
            tf.keras.layers.MaxPool2D(),
            tf.keras.layers.Conv2D(128, kernel_size = 3,
                                padding = 'same', kernel_regularizer=regularizer, activation=tf.keras.activations.relu),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(500, kernel_regularizer=regularizer, activation=tf.keras.activations.relu),
            tf.keras.layers.Dense(250, kernel_regularizer=regularizer, activation=tf.keras.activations.relu),
            tf.keras.layers.Dense(len(animals_names), kernel_regularizer=regularizer),
            tf.keras.layers.Softmax()
])

model.summary()

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(patience = 7, restore_best_weights = True)

In [None]:
# clear the session for a clean run
keras.backend.clear_session()
tf.random.set_seed(42)

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train,validation_data = X_valid,  epochs = 30,callbacks = [early_stopping])

### Visualization Training/Valid Results

In [None]:
import matplotlib.pyplot as plt

def training_plot(metrics, history):
    f, ax = plt.subplots(1, len(metrics), figsize=(5 * len(metrics), 5))
    for idx, metric in enumerate(metrics):
        ax[idx].plot(history.history[metric], ls='dashed')
        ax[idx].set_xlabel("Epochs")
        ax[idx].set_ylabel(metric)
        ax[idx].plot(history.history['val_' + metric])
        ax[idx].legend([metric, 'val_' + metric])