In [1]:
import os
from math import ceil
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

In [None]:
ROOT = "Oral Cancer Prediction"
DATA_PATH = os.path.join(ROOT, "assets", "dataset")

IMG_SIZE = (224, 224)

BATCH_SIZE = 32

In [None]:
ds = image_dataset_from_directory(
    DATA_PATH, image_size=IMG_SIZE, batch_size=BATCH_SIZE, shuffle=True, seed=123
)

Found 719 files belonging to 2 classes.


In [None]:
CLASS_NAMES = ds.class_names
print(CLASS_NAMES)

class_counts = {
    0: os.listdir(os.path.join(DATA_PATH, CLASS_NAMES[0])).__len__(),
    1: os.listdir(os.path.join(DATA_PATH, CLASS_NAMES[1])).__len__(),
}
print(class_counts)

['cancer', 'normal']
{0: 188, 1: 531}


# Bulding the model


In [None]:
def report(model_name):
    # Evaluate model on training, validation, and testing datasets
    train_eval = model_name.evaluate(train_ds)
    val_eval = model_name.evaluate(val_ds)
    test_eval = model_name.evaluate(test_ds)

    print(
        f"[train] acc: {train_eval[1]:.4f} - auc: {train_eval[2]:.4f} - loss: {train_eval[0]:.4f}"
    )
    print(
        f"[val]   acc: {val_eval[1]:.4f} - auc: {val_eval[2]:.4f} - loss: {val_eval[0]:.4f}"
    )
    print(
        f"[test]  acc: {test_eval[1]:.4f} - auc: {test_eval[2]:.4f} - loss: {test_eval[0]:.4f}\n"
    )

    # Get True and Predicted results
    y_true = np.concatenate([y.numpy() for x, y in val_ds], axis=0)
    y_pred_probs = model_name.predict(val_ds)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Generate Classification report
    print(classification_report(y_true, y_pred, target_names=CLASS_NAMES, digits=4))

In [None]:
def train(
    model,
    train_ds,
    val_ds,
    opti="adam",
    log_monitor="val_auc",
    md="max",
    patience=5,
    batch_size=BATCH_SIZE,
    epochs=100,
):
    tf.keras.backend.clear_session()
    # Compile
    model.compile(
        optimizer=opti,
        loss="binary_crossentropy",
        metrics=["accuracy", tf.keras.metrics.AUC()],
    )

    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor=log_monitor, patience=patience, mode=md, restore_best_weights=True
    )

    # Train
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[early_stop],
    )

## Agumenting dataset


In [None]:
def dataset_partitions(
    ds, train_split=0.8, val_split=0.1, shuffle=True, shuffle_size=1000
):
    if shuffle:
        ds.shuffle(shuffle_size)

    ds_size = int(ds.reduce(0, lambda x, _: x + 1).numpy())
    train_size = int(ds_size * train_split)
    val_size = ceil(ds_size * val_split)

    train_ds = ds.take(train_size)
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)

    print(f"Number of batches  : {ds_size}")
    print(f"Training batches   : {train_size}")
    print(f"Validating batches : {val_size}")
    print(f"Testing batches    : {ds_size - train_size - val_size}")

    return train_ds, val_ds, test_ds

In [8]:
train_ds, val_ds, test_ds = dataset_partitions(ds)

train_ds = train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

Number of batches  : 23
Training batches   : 18
Validating batches : 3
Testing batches    : 2


In [None]:
data_augmentation = tf.keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.2),
        layers.RandomZoom(0.1),
    ]
)

## Model design


In [None]:
effnetB0_base_model = EfficientNetB0(weights="imagenet", include_top=False)
effnetB0_base_model.trainable = False

inputs = tf.keras.Input(shape=(*IMG_SIZE, 3))
x = data_augmentation(inputs)
x = layers.Rescaling(1.0 / 255)(x)
x = effnetB0_base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model_effnetB0 = tf.keras.Model(inputs, outputs)

model_effnetB0.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
train(model_effnetB0, train_ds, val_ds, epochs=50)

Epoch 1/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 5s/step - accuracy: 0.6013 - auc: 0.5588 - loss: 0.6432 - val_accuracy: 0.7500 - val_auc: 0.7205 - val_loss: 0.5113
Epoch 2/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 3s/step - accuracy: 0.7725 - auc: 0.7825 - loss: 0.4892 - val_accuracy: 0.8229 - val_auc: 0.8756 - val_loss: 0.4126
Epoch 3/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 3s/step - accuracy: 0.8409 - auc: 0.8699 - loss: 0.4163 - val_accuracy: 0.8229 - val_auc: 0.8979 - val_loss: 0.3713
Epoch 4/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 3s/step - accuracy: 0.8392 - auc: 0.8746 - loss: 0.3921 - val_accuracy: 0.8125 - val_auc: 0.9117 - val_loss: 0.3413
Epoch 5/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 3s/step - accuracy: 0.8530 - auc: 0.8981 - loss: 0.3669 - val_accuracy: 0.8542 - val_auc: 0.9251 - val_loss: 0.3195
Epoch 6/50
[1m18/18[0m [32m━━━━━━━━━

In [13]:
report(model_effnetB0)

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 3s/step - accuracy: 0.9632 - auc: 0.9912 - loss: 0.1418
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3s/step - accuracy: 0.9505 - auc: 0.9718 - loss: 0.1666
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 992ms/step - accuracy: 0.9612 - auc: 0.9931 - loss: 0.1591
[train] acc: 0.9688 - auc: 0.9923 - loss: 0.1315
[val]   acc: 0.9479 - auc: 0.9800 - loss: 0.1568
[test]  acc: 0.9574 - auc: 0.9896 - loss: 0.1741

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3s/step
              precision    recall  f1-score   support

      cancer     0.2500    1.0000    0.4000        24
      normal     0.0000    0.0000    0.0000        72

    accuracy                         0.2500        96
   macro avg     0.1250    0.5000    0.2000        96
weighted avg     0.0625    0.2500    0.1000        96



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
model_effnetB0.save("/content/drive/MyDrive/Colab Notebooks/my_model.keras")