# CNN Model Trainer

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report, confusion_matrix


## Import dataset from Kaggle or Local, and Preparation

In [None]:
import kagglehub

dataset_path = ""
using_kaggle_api = False

if using_kaggle_api:
    # Download latest version
    dataset_path = kagglehub.dataset_download("a2015003713/militaryaircraftdetectiondataset")
else:
    dataset_path = "../data/archive_aircraft/crop"
    
print("Path to dataset files:", dataset_path)


filepaths = []
labels = []

# Get all files and labels
folders = os.listdir(dataset_path)

# Get all files and label from folder names
for folder_name in folders:
    # Get all files in folder_name: F16, A10, C130, etc.
    files = os.listdir(os.path.join(dataset_path, folder_name))
    print("Folder:", folder_name, "Number of files:", len(files))
    count = 0 
    for f in files:
        filepaths.append(os.path.join(dataset_path, folder_name, f))
        labels.append(folder_name)
        count += 1
        print(f"{folder_name}: #{count}", end="\r")
    print("#" * 24)
    

dataset_dir = pd.DataFrame(data={"filepaths": filepaths, "labels": labels})

In [None]:
dataset_dir

In [None]:
train_df, test_df = train_test_split(dataset_dir, test_size=0.2, shuffle=True, random_state=42)

img_gen = ImageDataGenerator()
target_size = (224, 224)

train_gen = img_gen.flow_from_dataframe(
    train_df, 
    x_col="filepaths", 
    y_col="labels", 
    target_size=target_size, 
    batch_size=32,
    class_mode="categorical",
    color_mode="rgb",
)

test_gen = img_gen.flow_from_dataframe(
    test_df, 
    x_col="filepaths", 
    y_col="labels", 
    target_size=target_size, 
    batch_size=32,
    class_mode="categorical",
    color_mode="rgb",
)

gpus = tf.config.experimental.list_physical_devices('GPU')
print("GPUs Available: ", gpus)

if (gpus):
    print("Setting memory growth for all GPUs")
    for gpu in gpus:
        try:
            tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)

# Model Foudation

In [None]:
"""
https://keras.io/api/applications/
https://www.tensorflow.org/api_docs/python/tf/keras/applications
"""
base_keras_model = tf.keras.applications.EfficientNetV2S(
    include_top=False,
    weights='imagenet',
    input_shape=(224, 224, 3),
    pooling='max',
)

model = Sequential([
    base_keras_model,
    BatchNormalization(),
    Dense(512, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(.01)),
    Dropout(.2),
    # Dense(256, activation="relu"),
    # Dropout(.2),
    # Output layer
    Dense(74, activation="softmax"),
])


model.compile(
    optimizer=Adamax(learning_rate=.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

model.summary()

## >> **Training Model** <<

In [None]:
result = model.fit(train_gen, validation_data=test_gen, epochs=20, verbose=1)

## >== **Result Ploting** ==<


In [None]:
train_acc = result.history["accuracy"]
train_loss = result.history["loss"]

validation_acc = result.history["val_accuracy"]
validation_loss = result.history["val_loss"]

index_acc_highest = np.argmax(validation_acc)
acc_highest = validation_acc[index_acc_highest]

index_lowest_loss = np.argmin(validation_loss)
validation_lowest_loss = validation_loss[index_lowest_loss]

loss_label = f"best epoch = {str(index_lowest_loss + 1)} with loss = {str(validation_lowest_loss)}"
acc_label = f"best epoch = {str(index_acc_highest + 1)} with acc = {str(acc_highest)}"

EPOCHS = [i+1 for i in range(len(train_acc))]




plt.figure(figsize=(10, 5))
plt.style.use("five_thirty_eight")

plt.subplot(1, 2, 1)
plt.plot(EPOCHS, train_acc, "blue", label="Train Accuracy")
plt.plot(EPOCHS, validation_acc, "pink", label="Validation Accuracy")
plt.scatter(index_acc_highest + 1, acc_highest, color="red", label=acc_label)
plt.title("Train & Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(EPOCHS, train_loss, "blue", label="Train Loss")
plt.plot(EPOCHS, validation_loss, "pink", label="Validation Loss")
plt.scatter(index_lowest_loss + 1, validation_lowest_loss, color="red", label=loss_label)
plt.title("Train & Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()

# Save model
model.save(f"aircraft_model_{train_acc:.02f}.h5")