# **Libraries**

In [3]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Conv2D, Reshape, Embedding, Concatenate, Permute, Flatten, LayerNormalization
from keras import initializers
import os

2024-05-18 18:12:07.002597: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-18 18:12:07.002812: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-18 18:12:07.157804: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# **Tool Box**

In [4]:
class TFViTPatchEmbeddings(Layer):
    
    def __init__(self,
                 
                 image_dim=(256,256,3),
                 patch_size=(8,8),
                 hidden_size=24,

                ):
        
        super().__init__()
        
        self.image_dim=image_dim
        self.patch_size=patch_size
        self.hidden_size=hidden_size
        
        
        self.projection=Conv2D(
            
            filters=self.hidden_size,
            kernel_size=patch_size,
            strides=patch_size[0],
            padding="same",
            data_format="channels_last",
            use_bias=True,
            bias_initializer="zeros",
            name="projection"
        )
        
        assert image_dim[0]%patch_size[0]==0 and image_dim[1]%patch_size[1]==0, "Image dimension not divisible by the Patch dimension"
        
        self.num_patches=(self.image_dim[0]//self.patch_size[0])*(self.image_dim[1]//self.patch_size[1])
        self.rah=Reshape((self.num_patches,-1))
    
    def call(self,x):
        
        x=self.projection(x)
        x = self.rah(x)
                
        return x
        
        

In [5]:
from keras_nlp.layers import PositionEmbedding

In [6]:
class TFViTEmbeddings(Layer):
    
    def __init__(self,num_patches):
        
        super().__init__()
        
        self.num_patches=num_patches
        self.embedding=PositionEmbedding(self.num_patches)
        
    
    def call(self,x):
        
        embedding=self.embedding(x)
        
        embedding=Concatenate(-1)((embedding,x))
        
        return embedding

In [7]:
class TFViTSelfAttention(Layer):
    
    def __init__(self,
                
                 num_patches=10,
                 hidden_dim=24,
                 num_heads=8,

                ):
        
        super().__init__()
        
        self.num_patches=num_patches
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        
        assert self.hidden_dim%self.num_heads==0, "Number Of Heads must be able to divide Hidden Dim"
        
        self.head_dim = self.hidden_dim // self.num_heads
        
        self.query=Dense(self.hidden_dim,kernel_initializer=initializers.GlorotUniform(),name="query")
        self.key=Dense(self.hidden_dim,kernel_initializer=initializers.GlorotUniform(),name="key")
        self.value=Dense(self.hidden_dim,kernel_initializer=initializers.GlorotUniform(),name="value")
        self.dk=tf.math.sqrt(tf.cast(self.head_dim,tf.float32))
        self.reshape=Reshape((self.num_patches,self.num_heads,self.head_dim))
        self.shape=Reshape((self.num_patches,self.hidden_dim))
        
        self.permute=Permute((2,1,3))
        self.repermute=Permute((2,1,3))
        
        
    def call(self,x):
        
        query=self.query(x)
        key=self.key(x)
        value=self.value(x)
        
        query=self.reshape(query)
        key=self.reshape(key)
        value=self.reshape(value)
        
        query=self.permute(query)
        key=self.permute(key)
        value=self.permute(value)
        
        attention_scores=tf.matmul(query, key, transpose_b=True)
        attention_scores=tf.divide(attention_scores,self.dk)
        attention_probability=tf.nn.softmax(attention_scores)
        
        attention_output=tf.matmul(attention_probability,value)
        attention_output=self.repermute(attention_output)
        output=self.shape(attention_output)
        
        return output        

In [8]:
class TFViTSelfOutput(Layer):
    
    def __init__(self,
                 
                 hidden_size
                
                ):
        super().__init__()

        self.hidden_size=hidden_size
        self.dense=Dense(self.hidden_size,kernel_initializer=initializers.GlorotUniform(),name="dense",activation="linear")
    
    def call(self,x):
        
        output=self.dense(x)
        return output        

In [9]:
class TFViTAttention(Layer):
    
    def __init__(self,
                 
                num_heads,
                num_patches,
                hidden_dim,
                
                ):
        super().__init__()
        
        self.num_heads=num_heads
        self.num_patches=num_patches
        self.hidden_dim=hidden_dim
        
        self.selfAttention=TFViTSelfAttention(self.num_patches,self.hidden_dim,self.num_heads)
        self.selfOutput=TFViTSelfOutput(self.hidden_dim)
    
    def call(self,x):
        
        selfOutput=self.selfOutput(x)
        selfAttention=self.selfAttention(x)
        
        outputs=selfOutput+selfAttention
        
        return outputs

In [10]:
class TFViTIntermediate(Layer):
    
    def __init__(self,intermediate_size):
        
        super().__init__()
        
        self.intermediate_size=intermediate_size
        self.dense=Dense(self.intermediate_size,activation="gelu")
    
    def call(self,x):
        
        x=self.dense(x)
        
        return x

In [11]:
class TFViTOutput(Layer):
    
    def __init__(self,hidden_size):
        super().__init__()
        
        self.hidden_size=hidden_size
        self.dense=Dense(self.hidden_size,activation="linear")
    
    def call(self,x):
        
        residue=x
        
        x=self.dense(x)
        
        return residue+x

In [12]:
class TFViTLayer(Layer):
    
    def __init__(self,
                
                num_heads,
                num_patches,
                hidden_dim,
                 
                ):
        super().__init__()
        
        self.num_heads=num_heads
        self.num_patches=num_patches
        self.hidden_dim=hidden_dim
        
        self.attention=TFViTAttention(self.num_heads,self.num_patches,self.hidden_dim)
        self.intermediate=TFViTIntermediate(self.hidden_dim)
            
        self.ln1=LayerNormalization()
        self.ln2=LayerNormalization()
            
    def call(self,x):
        
        residue=x
        
        x=self.ln1(x)
        attention=self.attention(x)
        
        x=x+attention
        
        x=self.ln2(x)
        x=self.intermediate(x)
        
        output=x+residue
        
        return output

In [13]:
class TFViTEncoder(Layer):
    
    def __init__(self,num_layer,num_heads,num_patches,hidden_dim):
        super().__init__()
        
        self.num_heads=num_heads
        self.num_patches=num_patches
        self.hidden_dim=hidden_dim
        
        self.layers=[TFViTLayer(self.num_heads,self.num_patches,self.hidden_dim) for i in range(num_layer)]
    
    def call(self,x):
        
        for layer in self.layers:
            
            residue=x
            x=layer(x)
            
        return residue+x

In [14]:
class TFViTMainLayer(Layer):
    
    def __init__(self,
                 image_dim,
                 patch_dim,
                 num_heads,
                 num_patches,
                 hidden_dim,
                 num_layer,
                 hidden_state
                 
                ):
        
        super().__init__()
        
        self.image_dim=image_dim
        self.patch_dim=patch_dim
        self.num_heads=num_heads
        self.num_patches=num_patches
        self.hidden_dim=hidden_dim
        self.num_layer=num_layer
        self.hidden_state=hidden_state
        
        self.patch_embedding=TFViTPatchEmbeddings(image_dim,patch_dim,hidden_dim)
        self.embedding=TFViTEmbeddings(self.num_patches)
        self.encoder=TFViTEncoder( self.num_layer, self.num_heads, self.num_patches, self.hidden_state )
        self.ln=LayerNormalization()
    
    def call(self,x):
        
        x=self.patch_embedding(x)
        embeddings=self.embedding(x)
        x=tf.concat((x,embeddings),axis=-1)
        
        x=self.encoder(x)
        x=self.ln(x)
        x=tf.nn.gelu(x)
        
        return x

In [15]:
import matplotlib.pyplot as plt

In [16]:
k=tf.ones((1,150,150,3))

In [17]:
Generator=tf.keras.preprocessing.image.ImageDataGenerator()
Generator=Generator.flow_from_directory("/kaggle/input/intel-image-classification/seg_train/seg_train")

Found 14034 images belonging to 6 classes.


In [30]:
model=tf.keras.Sequential([
    
    TFViTMainLayer((150,150,3),(10,10),4,225,32,5,96),
    Flatten(),
    Dense(256,activation="relu"),
    Dense(256,activation="relu"),
    Dense(6,activation="softmax"),
    
])

In [31]:
model.compile(optimizer="adam",loss="sparse_categorical_crossentropy")