In [2]:
from dataclasses import dataclass
import pandas as pd
import os
import SimpleITK as sitk

import keras as tfk
from keras import layers as tfkl

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.data import Dataset as tfds
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

sns.set_theme()

In [3]:
SEED = 42
BATCH_SIZE = 64

IMG_HEIGHT = 224
IMG_WIDTH = 224
METRICS = ["accuracy", "recall", "f1_score"]

data_dir = '/kaggle/input/lung-ds/Full_slice'

tfk.utils.set_random_seed(SEED)

In [8]:
from tensorflow.keras.applications.mobilenet import preprocess_input


train_datagen = ImageDataGenerator(
    #rescale=1.0/255,
    preprocessing_function=preprocess_input,
    rotation_range=10,            
    #width_shift_range=0.2,        
    #height_shift_range=0.2,       
    shear_range=0.15,              
    zoom_range=0.15,               
    #horizontal_flip=True,
    #vertical_flip=True,
    fill_mode='nearest' 
    
)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    data_dir + "/train",
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    seed=42,
    #shuffle=False
    #subset='training'               
)

val_generator = val_datagen.flow_from_directory(
    data_dir + "/val",
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    seed=42,
    #shuffle=False
    #subset='val'
)

Found 1890 images belonging to 5 classes.
Found 473 images belonging to 5 classes.


In [9]:
class_indices = train_generator.class_indices
num_classes = len(class_indices)

labels = train_generator.classes 

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)

class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

class_weight_dict

{0: 1.9285714285714286,
 1: 1.052924791086351,
 2: 0.42615558060879366,
 3: 1.148936170212766,
 4: 3.176470588235294}

In [10]:
@dataclass
class Hyperparameters:
    # Model
    activation = "silu"
    # Training
    noise_std: float = 0.05 # then try 0.1
    optimiser = tfk.optimizers.Adam
    learning_rate = 1e-4
    weight_decay = 1e-4
    regularization1 = tfk.regularizers.L1L2(l1=1e-5, l2=0)
    regularization2 = tfk.regularizers.L1L2(l1=0, l2=0)
    loss = tfk.losses.CategoricalCrossentropy()
    epochs = 50
    ## Early stopping parameters
    es_patience = 15
    es_min_delta = 1e-2
    ## Learning rate schedule
    lr_patience = 5
    lr_decay_factor = 0.3
    lr_min_delta = 1e-2 
    min_lr = 1e-6

hp = Hyperparameters()

In [11]:
def build_model(hp: Hyperparameters,
                feature_extractor: tfk.applications):
    
    #inputs = tfkl.Input((IMG_HEIGHT, IMG_WIDTH, 3))

    backbone_output = feature_extractor.output

    x=tfkl.GlobalAveragePooling2D()(backbone_output)
    x=tfkl.Dense(128, activation='relu')(x)
    x=tfkl.Dropout(0.4)(x)
    
    out=tfkl.Dense(5,activation='softmax')(x)
    
    model = tfk.Model(inputs=feature_extractor.input, outputs=out)
    return model

In [12]:
def fit(model: tfk.Model,
        train_generator: ImageDataGenerator,
        val_generator: ImageDataGenerator,
        hp: Hyperparameters,
        class_weights = None):
    model.compile(loss=hp.loss,
                  optimizer=hp.optimiser(learning_rate=hp.learning_rate), #, weight_decay=hp.weight_decay), 
                  metrics=['accuracy'])
    
    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=hp.epochs,
        class_weight=class_weights,
        #validation_data=val_dataset,
        callbacks=[
            tfk.callbacks.EarlyStopping(monitor='val_loss', 
                                        #mode='min',
                                        patience=hp.es_patience, 
                                        restore_best_weights=True),
            tfk.callbacks.ReduceLROnPlateau(
                factor=hp.lr_decay_factor,
                patience=hp.lr_patience,
                min_delta=hp.lr_min_delta,
                min_lr=hp.min_lr,
                verbose=1,
            )
    ]
    
    ).history

    return model, history

In [21]:
backbone = tfk.applications.MobileNetV2(
    include_top=False,
    weights='imagenet',
    input_shape=(IMG_HEIGHT,IMG_WIDTH,3),
    #pooling='avg'
)

#backbone.summary()

#backbone.trainable = True

for layer in backbone.layers:
    layer.trainable= False

model = build_model(hp, backbone)

model, history = fit(model, train_generator, val_generator, hp, class_weight_dict)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 2s/step - accuracy: 0.2113 - loss: 2.2874 - val_accuracy: 0.1564 - val_loss: 1.7542 - learning_rate: 1.0000e-04
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.1954 - loss: 1.9575 - val_accuracy: 0.2347 - val_loss: 1.6474 - learning_rate: 1.0000e-04
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - accuracy: 0.2231 - loss: 1.7883 - val_accuracy: 0.2220 - val_loss: 1.6283 - learning_rate: 1.0000e-04
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.2106 - loss: 1.6919 - val_accuracy: 0.2389 - val_loss: 1.6190 - learning_rate: 1.0000e-04
Epoch 5/50
[1m

In [None]:
# Plot loss curves
plt.style.use('default')
plt.plot(history['loss'], label='Train loss', alpha=0.3, linestyle='--')
plt.plot(history['val_loss'], label='Val loss', alpha=0.8)
plt.title('Categorical Crossentropy Loss')
plt.legend(loc='upper left')
plt.grid(alpha=0.3)

In [None]:
# Plot accuracy curves
plt.style.use('default')
plt.plot(history['accuracy'], label='Train accuracy', alpha=0.3, linestyle='--')
plt.plot(history['val_accuracy'], label='Val accuracy', alpha=0.8)
plt.title('Accuracy')
plt.legend(loc='upper left')
plt.grid(alpha=0.3)

In [None]:
# Predict on validation data and plot confusion matrix
y_true = []
for _, labels in val_generator:
    y_true.extend(labels)
    if len(y_true) >= val_generator.samples:  # Stop when all samples are processed
        break
y_true = np.array(y_true)

if val_generator.class_mode == 'categorical':
    y_true = np.argmax(y_true, axis=1)  # Convert one-hot to class indices

y_pred = model.predict(val_generator)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert probabilities to class indices
print("Accuracy: ", accuracy_score(y_true, y_pred_classes))

cm = confusion_matrix(y_true, y_pred_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=val_generator.class_indices.keys(), yticklabels=val_generator.class_indices.keys())
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()