In [None]:
import cv2
import numpy as np
from datetime import datetime

from sklearn.model_selection import train_test_split

from tensorflow.keras.optimizers import Adam
from tensorflow.keras import applications
from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau

os.chdir("..") # Changing to parent directory 

In [None]:
DATA = "data/"

In [None]:
def preprocess_images_of_one_label(image_paths: List, label: int) -> List:
    """Get images and associated labels.

    Args:
        image_paths: List of image paths.
        label: Label for the images.

    Returns:
        List: Images and labels.
    """
    arr = []
    labels = []

    for image_path in image_paths:
        img = cv2.imread(str(image_path))
        img = cv2.resize(img, (224, 224))
        img = applications.vgg16.preprocess_input(img)
        arr.append(img)
        labels.append(label)

    return arr, labels


In [None]:
def preprocess_images(data_directory: str):
    """Preprocess images.

    Args:
        data_directory: Directory path.
    
    Returns:
        List: Preprocessed images and labels.
    """
    train = pathlib.Path(os.path.join(data_directory))

    '''TODO:
    email, Y_email = preprocess_images_of_one_label(list(train.glob("email/*.tif")), 0)
    invoice, Y_invoice = preprocess_images_of_one_label(list(train.glob("invoice/*.tif")), 1)
    letter, Y_letter = preprocess_images_of_one_label(list(train.glob("letter/*.tif")), 2)
    resume, Y_resume = preprocess_images_of_one_label(list(train.glob("resume/*.tif")), 3)

    images = email + invoice + letter + resume
    labels = Y_email + Y_invoice + Y_letter + Y_resume
    '''

    images = np.asarray(images)
    labels = np.asarray(labels)

    #TODO: labels = to_categorical(labels, num_classes=4)

    return images, labels

In [None]:
def plot_accuracy(history: History):
    """Plot accuracy during training.

    Args:
        history: Model history.
    """
    plt.plot(history.history["accuracy"])
    plt.plot(history.history["val_accuracy"])
    plt.title("Model accuracy")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Validation"], loc="upper left")
    plt.show()

def plot_loss(history: History):
    """Plot loss during training.

    Args:
        history: Model history.
    """
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.title("Model loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Validation"], loc="upper left")
    plt.show()


In [None]:
images, labels = preprocess_images(data_directory=DATA)

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=42, stratify=y_train)

In [None]:
model = applications.vgg16.VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3),)

early_stopping = EarlyStopping(monitor="val_loss", patience=2, verbose=1, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2, min_lr=0.000001)

for layer in model.layers:
    layer.trainable = True

x = model.output
x = Flatten()(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(128, activation="relu")(x)
predictions = Dense(4, activation="softmax")(x)

model_final = Model(inputs=model.input, outputs=predictions)

model_final.compile(
    loss="categorical_crossentropy", 
    optimizer=Adam(learning_rate=0.0001), 
    metrics=["accuracy"]
)

In [None]:
history = model_final.fit(
    X_train,
    y_train,
    epochs=5, 
    batch_size=32,   
    validation_data=(X_val, y_val),
    callbacks=[reduce_lr, early_stopping]
)

In [None]:
plot_accuracy(history)
plot_loss(history)