In [2]:
import numpy as np
import pathlib
import tensorflow as tf
!pip install tensorflow-addons
import tensorflow_addons as tfa
from tensorflow import keras

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/611.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.6/611.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m563.2/611.8 kB[0m [31m8.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.23.0 typeguard-2.13.3



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [3]:
INPUT_SHAPE = (256,256,3)
IMAGE_SIZE = 72
PATCH_SIZE = 9
NUM_PATCHES = (IMAGE_SIZE // PATCH_SIZE) ** 2

TRANSFORMER_LAYERS = 4
NUM_HEADS = 8
PROJECTION_DIM = 64
TRANSFORMER_UNITS = [PROJECTION_DIM * 2, PROJECTION_DIM,]
MLP_HEAD_UNITS = [256, 128]
LAYER_NORM_EPS = 1e-6

LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0001

In [4]:
class Patches(keras.layers.Layer):
    def __init__(self, patch_size=PATCH_SIZE):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

In [5]:
class PatchEncoder(keras.layers.Layer):
    def __init__(self, num_patches=NUM_PATCHES, projection_dim=PROJECTION_DIM):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = keras.layers.Dense(units=projection_dim)
        self.position_embedding = keras.layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

In [6]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = keras.layers.Dense(units, activation=tf.nn.gelu)(x)
        x = keras.layers.Dropout(dropout_rate)(x)
    return x

In [8]:
def create_classifier_model(NUM_CLASSES):
    inputs = keras.layers.Input(shape=INPUT_SHAPE)
    #augmented = data_augmentation(inputs)
    augmented = inputs
    patches = Patches()(augmented)
    encoded_patches = PatchEncoder()(patches)

    for _ in range(TRANSFORMER_LAYERS):
        x1 = keras.layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = keras.layers.MultiHeadAttention(
                num_heads=NUM_HEADS, key_dim=PROJECTION_DIM, dropout=0.1
            )(x1, x1)
        x2 = keras.layers.Add()([attention_output, encoded_patches])
        x3 = keras.layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = mlp(x3, hidden_units=TRANSFORMER_UNITS, dropout_rate=0.1)
        encoded_patches = keras.layers.Add()([x3, x2])

    representation = keras.layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = keras.layers.Flatten()(representation)
    representation = keras.layers.Dropout(0.5)(representation)
    features = mlp(representation, hidden_units=MLP_HEAD_UNITS, dropout_rate=0.5)
    logits = keras.layers.Dense(NUM_CLASSES)(features)
    model = keras.Model(inputs=inputs, outputs=logits)
    model.summary()
    return model

In [10]:
ViTmodel = create_classifier_model(50)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 patches (Patches)           (None, None, 243)            0         ['input_1[0][0]']             
                                                                                                  
 patch_encoder (PatchEncode  (None, 64, 64)               19712     ['patches[0][0]']             
 r)                                                                                               
                                                                                                  
 layer_normalization (Layer  (None, 64, 64)               128       ['patch_encoder[0][0]']   

In [18]:
img_input = tf.keras.layers.Input(shape=(224,224,1), name="image_input")
cnn_layer = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu')(img_input)
cnn_layer = tf.keras.layers.MaxPooling2D()(cnn_layer)
cnn_layer = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu')(cnn_layer)
cnn_layer = tf.keras.layers.MaxPooling2D()(cnn_layer)
cnn_layer = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu')(cnn_layer)
cnn_layer = tf.keras.layers.MaxPooling2D()(cnn_layer)
cnn_layer = tf.keras.layers.Dropout(0.1)(cnn_layer)
cnn_layer = tf.keras.layers.Flatten()(cnn_layer)
network_output = tf.keras.layers.Dense(50, activation='softmax')(cnn_layer)
cnn_classifier_model = tf.keras.Model(inputs=img_input, outputs=network_output)

In [19]:
print(cnn_classifier_model.summary())

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 224, 224, 1)]     0         
                                                                 
 conv2d_7 (Conv2D)           (None, 224, 224, 16)      160       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 112, 112, 16)      0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 112, 112, 32)      4640      
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 56, 56, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_9 (Conv2D)           (None, 56, 56, 64)        1849