In [1]:
import os
import glob
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0, MobileNet, ResNet50
from tensorflow.keras.layers import Concatenate, Conv2D, MaxPooling2D, Flatten, Dense, MultiHeadAttention, Add, Multiply, Softmax
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [14]:
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Reshape, Input, Add

def rename_layers(model, prefix):
    for layer in model.layers:
        layer.name = prefix + layer.name

from tensorflow.keras.layers import Conv2D, Concatenate, Multiply, GlobalAveragePooling2D, GlobalMaxPooling2D, Reshape, Lambda

def spatial_attention(input_feature):
    kernel_size = 7  # You can experiment with different kernel sizes

    # Average Pooling along the channel axis
    avg_pool = Lambda(lambda x: tf.reduce_mean(x, axis=-1, keepdims=True))(input_feature)
    
    # Max Pooling along the channel axis
    max_pool = Lambda(lambda x: tf.reduce_max(x, axis=-1, keepdims=True))(input_feature)

    # Concatenate along the channel axis
    concat = Concatenate(axis=-1)([avg_pool, max_pool])
    
    # Apply a convolution layer to generate attention map
    attention = Conv2D(1, kernel_size=kernel_size, strides=1, padding='same', activation='sigmoid', 
                       kernel_initializer='he_normal', use_bias=False)(concat)
    
    # Multiply input feature by the attention map
    return Multiply()([input_feature, attention])
        
        
def channel_attention(input_feature, ratio=8):
    channel = input_feature.shape[-1]
    
    avg_pool = GlobalAveragePooling2D()(input_feature)
    avg_pool = Reshape((1, 1, channel))(avg_pool)
    avg_pool = Dense(channel // ratio, activation='relu', kernel_initializer='he_normal', use_bias=True)(avg_pool)
    avg_pool = Dense(channel, activation='sigmoid', kernel_initializer='he_normal', use_bias=True)(avg_pool)

    max_pool = GlobalMaxPooling2D()(input_feature)
    max_pool = Reshape((1, 1, channel))(max_pool)
    max_pool = Dense(channel // ratio, activation='relu', kernel_initializer='he_normal', use_bias=True)(max_pool)
    max_pool = Dense(channel, activation='sigmoid', kernel_initializer='he_normal', use_bias=True)(max_pool)

    attention = Add()([avg_pool, max_pool])
    return Multiply()([input_feature, attention])
        
    
def channel_spatial_attention(input_feature, ratio=8):
    # Apply channel attention
    x = channel_attention(input_feature, ratio=ratio)
    
    # Apply spatial attention
    x = spatial_attention(x)
    
    return x


In [3]:
# Preprocessing function
def createFrame(path, IMG_DIM):
    train_imgs = []
    labels = []
    directories = os.listdir(path)
    for i in range(len(directories)):
        ls = []
        temp = []
        curPath = os.path.join(path, directories[i], '*')
        ls = glob.glob(curPath)
        for img in ls:
            x = img_to_array(load_img(img, target_size=IMG_DIM))
            x = x / 255.0
            temp.append(x)
        
        print(f'Number of images in {directories[i]}: ', len(ls))
        train_imgs += temp
        label = [i] * len(ls)
        labels += label

    df = pd.DataFrame(list(zip(train_imgs, labels)))
    df = df.sample(frac=1).reset_index(drop=True)
    return df,directories

# Define image dimensions
IMG_WIDTH = 224
IMG_HEIGHT = 224
IMG_DIM = (IMG_WIDTH, IMG_HEIGHT, 3)

# Load data
path = '/kaggle/input/sipakmed/data'
df,num_classes = createFrame(path, IMG_DIM)

Number of images in im_Parabasal:  787
Number of images in im_Dyskeratotic:  813
Number of images in im_Metaplastic:  793
Number of images in im_Superficial-Intermediate:  831
Number of images in im_Koilocytotic:  825


In [19]:
dfTrain, dfTest = train_test_split(df, test_size=0.2, random_state=42, stratify=df[1])
train_imgs = np.array(list(dfTrain[0])) / 255.0
train_labels = np.array(dfTrain[1])
encoder = LabelEncoder()
encoder.fit(train_labels)
train_labels = to_categorical(encoder.transform(train_labels))

# Prepare test data
test_imgs = np.array(list(dfTest[0])) / 255.0
test_labels = np.array(dfTest[1])
test_labels = to_categorical(encoder.transform(test_labels))

In [29]:
from tensorflow.keras.layers import DepthwiseConv2D, SeparableConv2D, BatchNormalization, Activation
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

def build_model():
    # Input
    inputs = tf.keras.Input(shape=(224, 224, 3), name="input_image")

    # Load pre-trained models
    efficientnetb0 = EfficientNetB0(include_top=False, input_tensor=inputs, weights='imagenet')
    mobilenet = MobileNet(include_top=False, input_tensor=inputs, weights='imagenet')
    resnet = ResNet50(include_top=False, input_tensor=inputs, weights='imagenet')

    # Rename layers
    rename_layers(efficientnetb0, 'efficientnetb0_')
    rename_layers(mobilenet, 'mobilenet_')
    rename_layers(resnet, 'resnet_')

    # Extract and Pool features
    efficientnetb0_output = GlobalAveragePooling2D()(efficientnetb0.output)
    mobilenet_output = GlobalAveragePooling2D()(mobilenet.output)
    resnet_output = GlobalAveragePooling2D()(resnet.output)

    # Concatenate features
    concatenated_features = Concatenate(axis=-1, name="concat_features")([efficientnetb0_output, mobilenet_output, resnet_output])
    concatenated_features = Reshape((1, 1, concatenated_features.shape[-1]))(concatenated_features)

    # Depthwise Separable Convolutions with Attention
    x = SeparableConv2D(1024, (3, 3), activation='relu', padding='same')(concatenated_features)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = channel_spatial_attention(x)

    x = SeparableConv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = channel_spatial_attention(x)

    x = SeparableConv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)

    # Flatten and Dense layers
    x = Flatten(name="flatten")(x)
    x = Dense(512, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = Dropout(0.5)(x)
    outputs = Dense(5, activation='softmax', name="dense_output")(x)  # Assuming 5 classes for classification

    # Create and compile the model
    lr_schedule = ExponentialDecay(initial_learning_rate=1e-5, decay_steps=10000, decay_rate=0.9)
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(learning_rate=lr_schedule), loss='categorical_crossentropy', metrics=['accuracy'])

    return model


In [30]:
model = build_model()

  mobilenet = MobileNet(include_top=False, input_tensor=inputs, weights='imagenet')


In [31]:
import os
import time
import numpy as np
import tensorflow as tf
    
history = model.fit(train_imgs, train_labels, validation_split=0.2, epochs=50, batch_size=32)

Epoch 1/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 2s/step - accuracy: 0.2436 - loss: 4.9878 - val_accuracy: 0.2191 - val_loss: 4.9195
Epoch 2/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 352ms/step - accuracy: 0.5106 - loss: 4.8887 - val_accuracy: 0.2191 - val_loss: 4.8299
Epoch 3/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 353ms/step - accuracy: 0.6877 - loss: 4.7748 - val_accuracy: 0.2191 - val_loss: 4.7437
Epoch 4/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 351ms/step - accuracy: 0.7513 - loss: 4.6109 - val_accuracy: 0.2191 - val_loss: 4.6633
Epoch 5/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 352ms/step - accuracy: 0.7689 - loss: 4.3598 - val_accuracy: 0.1728 - val_loss: 4.5918
Epoch 6/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 352ms/step - accuracy: 0.7952 - loss: 3.9903 - val_accuracy: 0.2037 - val_loss: 4.5273
Epoch 7/50
[1m81/81[0m

In [32]:
pred = model.predict(test_imgs)

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 561ms/step


In [33]:
res = np.argmax(pred,axis=-1)

In [34]:
actual = np.argmax(test_labels,axis=-1)

In [35]:
from sklearn.metrics import classification_report

print(classification_report(actual,res,target_names=num_classes,digits=5))

                             precision    recall  f1-score   support

               im_Parabasal    0.98742   1.00000   0.99367       157
            im_Dyskeratotic    0.97531   0.96933   0.97231       163
             im_Metaplastic    0.95597   0.95597   0.95597       159
im_Superficial-Intermediate    1.00000   0.99398   0.99698       166
            im_Koilocytotic    0.93939   0.93939   0.93939       165

                   accuracy                        0.97160       810
                  macro avg    0.97162   0.97173   0.97167       810
               weighted avg    0.97161   0.97160   0.97159       810

