In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Preprocessing
def preprocess_image(image, label=None):
    image = tf.image.resize(image, (144, 144))
    image = tf.image.convert_image_dtype(image, tf.float32) / 255.0
    return image, label

batch_size = 32
train_data_dir = '/kaggle/input/shitpy/dataset/train'
test_data_dir = '/kaggle/input/shitpy/dataset/test'

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_dataset = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(144, 144),
    batch_size=batch_size,
    class_mode='categorical'
)

unlabeled_test_dataset = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(144, 144),
    batch_size=batch_size,
    class_mode=None,  
    shuffle=False  
)

# Model Definition
class PatchEmbedding(layers.Layer):
    def __init__(self, patch_size=8, emb_size=128):
        super(PatchEmbedding, self).__init__()
        self.patch_size = patch_size
        self.projection = tf.keras.Sequential([
            layers.Reshape((-1, patch_size * patch_size * 3)),
            layers.Dense(emb_size)
        ])

    def call(self, x):
        return self.projection(x)

class Attention(layers.Layer):
    def __init__(self, dim, n_heads, dropout):
        super(Attention, self).__init__()
        self.n_heads = n_heads
        self.att = layers.MultiHeadAttention(num_heads=n_heads, key_dim=dim, dropout=dropout)

    def call(self, x):
        return self.att(x, x)

class PreNorm(layers.Layer):
    def __init__(self, dim, fn):
        super(PreNorm, self).__init__()
        self.norm = layers.LayerNormalization(epsilon=1e-6)
        self.fn = fn

    def call(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class FeedForward(layers.Layer):
    def __init__(self, dim, hidden_dim, dropout=0.):
        super(FeedForward, self).__init__()
        self.ffn = tf.keras.Sequential([
            layers.Dense(hidden_dim, activation='gelu'),
            layers.Dropout(dropout),
            layers.Dense(dim),
            layers.Dropout(dropout)
        ])

    def call(self, x):
        return self.ffn(x)

class ResidualAdd(layers.Layer):
    def __init__(self, fn):
        super(ResidualAdd, self).__init__()
        self.fn = fn

    def call(self, x, **kwargs):
        res = x
        x = self.fn(x, **kwargs)
        x += res
        return x

class ViT(Model):
    def __init__(self, img_size=144, patch_size=4, emb_dim=32, n_layers=6, out_dim=5, dropout=0.1, heads=2):
        super(ViT, self).__init__()

        self.patch_embedding = PatchEmbedding(patch_size=patch_size, emb_size=emb_dim)
        num_patches = (img_size // patch_size) ** 2
        self.pos_embedding = tf.Variable(tf.random.normal((1, num_patches + 1, emb_dim)))
        self.cls_token = tf.Variable(tf.random.normal((1, 1, emb_dim)))

        self.transformer_blocks = []
        for _ in range(n_layers):
            transformer_block = tf.keras.Sequential([
                ResidualAdd(PreNorm(emb_dim, Attention(emb_dim, n_heads=heads, dropout=dropout))),
                ResidualAdd(PreNorm(emb_dim, FeedForward(emb_dim, emb_dim, dropout=dropout)))
            ])
            self.transformer_blocks.append(transformer_block)

        self.head = tf.keras.Sequential([
            layers.LayerNormalization(epsilon=1e-6),
            layers.Dense(out_dim)
        ])

    def call(self, img):
        x = self.patch_embedding(img)
        b, n, _ = x.shape

        cls_tokens = tf.repeat(self.cls_token, repeats=b, axis=0)
        x = tf.concat([cls_tokens, x], axis=1)
        x += self.pos_embedding[:, :(n + 1)]

        for block in self.transformer_blocks:
            x = block(x)

        return self.head(x[:, 0, :])

model = ViT()

# Loss function and optimizer
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    epoch_losses = []
    for step, (inputs, labels) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            outputs = model(inputs)
            loss = loss_fn(labels, outputs)

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        epoch_losses.append(loss.numpy())

    if epoch % 5 == 0:
        print(f">>> Epoch {epoch} train loss: ", np.mean(epoch_losses))

# Prediction on unlabeled test data
predictions = model.predict(unlabeled_test_dataset, verbose=1)

# The 'predictions' variable now contains the model predictions for the unlabeled test data
print("Predictions shape:", predictions.shape)

In [None]:
predd = pd.DataFrame('predictions')
predd.to_csv('pred_transf.csv',index=False)