## AlexNet Model Training Porocess

### Import libraries

In [None]:
import numpy as np
import keras
import tensorflow as tf
from matplotlib import pyplot as plt
from time import time
import warnings
from sklearn.metrics import classification_report, confusion_matrix
import itertools
import os
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
%matplotlib inline

### Dataset paths

In [None]:
train_path = ''
valid_path = ''
test_path = ''

### Training classes

In [None]:
train_classes = os.listdir(train_path)

### Training batch sizes

In [None]:
train_batch_size = 32
test_batch_size = 32
valid_batch_size = 32

### Training samples

In [None]:
train_n = 10080
test_n = 2520
valid_n = 2520

### Training steps (per epoch)

In [None]:
train_steps = train_n//train_batch_size
valid_steps = valid_n//valid_batch_size
test_steps = test_n//test_batch_size

### Epochs

In [None]:
epochs = 1000

### Generate batches

In [None]:
train_batches = tf.keras.preprocessing.image.ImageDataGenerator().flow_from_directory(train_path, target_size=(227,227), classes=train_classes, batch_size=train_batch_size)
valid_batches = tf.keras.preprocessing.image.ImageDataGenerator().flow_from_directory(valid_path, target_size=(227,227), classes=train_classes, batch_size=valid_batch_size)
test_batches = tf.keras.preprocessing.image.ImageDataGenerator().flow_from_directory(test_path, target_size=(227,227), classes=train_classes, batch_size=test_batch_size)

### Base model (AlexNet)

#### Training from scratch

In [None]:
new_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(227,227,3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    tf.keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4096, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4096, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(train_classes), activation='softmax')
])

### Set early stopping
Avoids model overfitting

In [None]:
es_callback = tf.keras.callbacks.EarlyStopping(
    monitor='acc',
    verbose=1,
    patience=20,
    mode='max',
    restore_best_weights=True
)

### Load tensorboard

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
import datetime
log_dir = "logs\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# !rm -rf logs/

In [None]:
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

### Generate new model

In [None]:
new_model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-3), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False,
    label_smoothing=0,
    reduction="auto",
    name="categorical_crossentropy"), metrics=['accuracy'])

### Training

In [None]:
history = new_model.fit_generator(train_batches, validation_data=valid_batches, validation_steps=valid_steps, steps_per_epoch=train_steps, epochs=epochs,callbacks=[tb_callback, es_callback])

### Evaluate training process

In [None]:
results = new_model.evaluate_generator(train_batches, train_steps)
results

#### Loss function

In [None]:
plt.semilogy(history.epoch,  history.history['loss'], label='Train')
plt.semilogy(history.epoch,  history.history['val_loss'], label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

#### Accuracy function

In [None]:
plt.semilogy(history.epoch,  history.history['acc'], label='Train')
plt.semilogy(history.epoch,  history.history['val_acc'], label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
Y_train_pred =new_model.predict_generator(train_batches,train_steps)

In [None]:
y_train_pred = np.argmax(Y_train_pred, axis=1)

#### Confusion matrix

In [None]:
cm_train = confusion_matrix(train_batches.classes, y_train_pred)

In [None]:
plt.figure(figsize=(15,15))
sns.heatmap(cm_train, annot=True, fmt="d")
plt.title('Confusion matrix (Train)')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

### Model testing

In [None]:
Y_pred =new_model.predict_generator(test_batches,test_steps)

In [None]:
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
cm = confusion_matrix(test_batches.classes, y_pred)

In [None]:
plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt="d")
plt.title('Confusion matrix')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

### Load tensorboard

In [None]:
%tensorboard --logdir logs

### Save model

In [None]:
new_model.save('model_name.h5')