In [1]:
from dataclasses import dataclass
import pandas as pd
import os
import SimpleITK as sitk

import keras as tfk
from keras import layers as tfkl

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.data import Dataset as tfds
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

sns.set_theme()

In [7]:
SEED = 42
BATCH_SIZE = 64

IMG_HEIGHT = 224
IMG_WIDTH = 224
METRICS = ["accuracy", "recall", "f1_score"]

data_dir = '/kaggle/input/lung-ds-preproc/Full_slice'

tfk.utils.set_random_seed(SEED)

In [25]:
@dataclass
class Hyperparameters:
    # Model
    activation = "silu"
    # Training
    optimiser = tfk.optimizers.Adam
    learning_rate = 1e-4
    loss = tfk.losses.CategoricalCrossentropy()
    epochs = 25
    ## Early stopping parameters
    es_patience = 15
    es_min_delta = 1e-2
    ## Learning rate schedule
    lr_patience = 5
    lr_decay_factor = 0.3
    lr_min_delta = 1e-2 
    min_lr = 1e-6

hp = Hyperparameters()

In [9]:
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input


train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=10,                  
    shear_range=0.15,              
    zoom_range=0.15,               
    #horizontal_flip=True,
)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    data_dir + "/train",
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    seed=42           
)

val_generator = val_datagen.flow_from_directory(
    data_dir + "/val",
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    seed=42
)

Found 1890 images belonging to 5 classes.
Found 473 images belonging to 5 classes.


In [10]:
class_indices = train_generator.class_indices
num_classes = len(class_indices)

labels = train_generator.classes 

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)

class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

In [14]:
def fit(model: tfk.Model,
        train_generator: ImageDataGenerator,
        hp: Hyperparameters,
        class_weights,
        weight_decay):
    model.compile(loss=hp.loss,
                  optimizer=hp.optimiser(learning_rate=hp.learning_rate, 
                                         weight_decay=weight_decay), 
                  metrics=['accuracy'])
    
    history = model.fit(
        train_generator,
        epochs=hp.epochs,
        class_weight=class_weights    
    ).history

    return model, history

In [15]:
def build_model(hp: Hyperparameters,
                feature_extractor: tfk.applications):
    
    inputs = tfkl.Input((IMG_HEIGHT, IMG_WIDTH, 3))

    backbone_output = feature_extractor(inputs)

    x = tfkl.Dropout(0.4)(backbone_output)

    x = tfkl.Dense(1024, activation='silu')(x)
    x = tfkl.Dense(512, activation='silu')(x)
    x = tfkl.Dense(256, activation='silu')(x)
    x = tfkl.Dense(128, activation='silu')(x)

    x = tfkl.Dropout(0.3)(x)

    out = tfkl.Dense(5, activation = 'softmax')(x)
    
    model = tfk.Model(inputs, out)
    return model

In [24]:
VALIDATION_ACCURACY = []
VALIDATION_LOSS = []

wd_candidates = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]

for wdc in wd_candidates:

    print("Training: " + str(wdc))
    
    backbone = tfk.applications.MobileNetV2(
        include_top=False,
        weights='imagenet',
        input_shape=(IMG_HEIGHT,IMG_WIDTH,3),
        pooling='avg'
    )
	
    model = build_model(hp, backbone)

    model, history = fit(model, 
                         train_generator,
                         hp, 
                         class_weight_dict,
                         wdc)	

    print("Testing: " + str(wdc))
    
    results = model.evaluate(val_generator)
    results = dict(zip(model.metrics_names,results))
	
    VALIDATION_ACCURACY.append(results['compile_metrics'])
    VALIDATION_LOSS.append(results['loss'])
	
    tf.keras.backend.clear_session()

Training: 1
Epoch 1/2
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 1s/step - accuracy: 0.3111 - loss: 1.5867
Epoch 2/2
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 725ms/step - accuracy: 0.2345 - loss: 1.5405
Testing: 1
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 490ms/step - accuracy: 0.3427 - loss: 1.5131
Training: 0.1
Epoch 1/2


KeyboardInterrupt: 