In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
import seaborn as sns

In [2]:
# Define the CNN model
def create_cnn_model(input_shape):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding = "same", input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding = "same"))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((3, 3) , strides=2))
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding = "same"))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((3, 3) , strides=2))
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding = "same"))
    model.add(layers.BatchNormalization())
    return model

In [3]:
# Define the Transformer model
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerEncoder, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
             layers.Dense(ff_dim, activation="relu"), 
             layers.Dense(embed_dim)
            ]
        )
        self.layernorm1 = layers.LayerNormalization()
        self.layernorm2 = layers.LayerNormalization()
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [4]:
def create_transformer_model(input_shape, embed_dim, num_heads, ff_dim, num_transformer_blocks):
    inputs = layers.Input(shape=input_shape)
    x = (inputs)
    # x = layers.LayerNormalization()(x)
    for _ in range(num_transformer_blocks):
        x = TransformerEncoder(embed_dim, num_heads, ff_dim)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation = "relu")(x)
    outputs = layers.Dense(2, activation='softmax')(x)
    return models.Model(inputs=inputs, outputs=outputs)

In [5]:
# Combine CNN and Transformer
def create_cnn_transformer_model(input_shape, embed_dim, num_heads, ff_dim, num_transformer_blocks):
    cnn_model = create_cnn_model(input_shape)
    cnn_output_shape = cnn_model.layers[-1].output_shape[1:]
    transformer_model = create_transformer_model(cnn_output_shape, embed_dim, num_heads, ff_dim, num_transformer_blocks)
    
    inputs = layers.Input(shape=input_shape)
    x = cnn_model(inputs)
    outputs = transformer_model(x)

    return models.Model(inputs=inputs, outputs=outputs)

In [16]:
datagen = ImageDataGenerator(shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=False,
                            validation_split = 0.2)
train_iterator = datagen.flow_from_directory("D:\Transformers\Datasets",target_size=(50, 50),batch_size = 32, subset = "training")
x_train, y_train = train_iterator.next()
test_iterator = datagen.flow_from_directory("D:\Transformers\Datasets",target_size=(50, 50),batch_size = 32, subset = "validation")
x_test, y_test = test_iterator.next()
x_train = x_train/255.0
x_test = x_test/225.0
input_shape = x_train.shape[1:]
print(input_shape)

Found 221357 images belonging to 2 classes.
Found 55338 images belonging to 2 classes.
(50, 50, 3)


In [17]:
# Create and compile the model
model = create_cnn_transformer_model(input_shape, embed_dim=128, num_heads=2, ff_dim=(128*2), num_transformer_blocks=5)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [18]:
# Train the model
model.fit(x_train, y_train, epochs=10, batch_size = 75)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23a1179e510>

In [19]:
model.evaluate(x_test, y_test)



[0.9662061929702759, 0.8125]

In [20]:
y_pred = model.predict(x_test)



In [21]:
y_test = np.reshape(y_test, newshape = 64)
y_pred = np.reshape(y_pred, newshape = 64)
y_pred = np.where(y_pred >= 0.5, 1, 0)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         0.0       0.81      0.81      0.81        32
         1.0       0.81      0.81      0.81        32

    accuracy                           0.81        64
   macro avg       0.81      0.81      0.81        64
weighted avg       0.81      0.81      0.81        64

