**Importing the necessary Libraries**

In [1]:
import tensorflow as tf
import os
import numpy as np
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms
# from helper_functions import set_seeds

2024-04-17 21:54:05.602329: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-17 21:54:05.602495: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-17 21:54:05.753538: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


**Defining the directories for our dataset**

In [2]:
train_dir = "/kaggle/input/medicinal-plant-bijly-withbg/Medicinal Plant-Bijly-withBG/Train"
validation_dir = "/kaggle/input/medicinal-plant-bijly-withbg/Medicinal Plant-Bijly-withBG/Validation"
test_dir = "/kaggle/input/medicinal-plant-bijly-withbg/Medicinal Plant-Bijly-withBG/Test"

**IMAGE PREPROCESSING FOR:**

***i). Training Data***

In [3]:
medical_train = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    data_format=None,
)

Found 3500 files belonging to 10 classes.


***ii). Validation Data***

In [4]:
medical_train = tf.keras.utils.image_dataset_from_directory(
    validation_dir,
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    data_format=None,
)

Found 500 files belonging to 10 classes.


***iii). Testing Data***

In [5]:
medical_train = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    data_format=None,
)

Found 1000 files belonging to 10 classes.


More Image preprocessing

In [6]:
from tensorflow.keras import layers

***Building the ViT Model***

In [7]:
def create_vit_model(image_size, num_classes):
    # Define input layer
    inputs = tf.keras.Input(shape=(image_size[0], image_size[1], 3))  # Assuming RGB images
    
    # Patching images
    patch_size = 16
    num_patches = (image_size[0] // patch_size) * (image_size[1] // patch_size)
    patch_dim = 768  # This can be adjusted based on the chosen ViT variant
    
    # Patch embedding layer
    patch_embedding = layers.Conv2D(patch_dim, kernel_size=patch_size, strides=patch_size, padding="valid", name="patch_embedding")(inputs)
    patch_flattened = layers.Reshape((num_patches, patch_dim))(patch_embedding)
    
    # Positional embeddings
    class_token = layers.Embedding(input_dim=1, output_dim=patch_dim)(tf.zeros((1, 1), dtype=tf.int32))
    position_embeddings = layers.Embedding(input_dim=num_patches, output_dim=patch_dim)(tf.range(0, num_patches, dtype=tf.int32))
    
    # Add position embeddings to patch embeddings
    class_token_expanded = tf.expand_dims(tf.expand_dims(class_token, axis=1), axis=1)  # Expand dimensions to match shape of position embeddings
    embeddings = patch_flattened + position_embeddings + class_token_expanded
    
    # Layer normalization
    embeddings = layers.LayerNormalization(epsilon=1e-6)(embeddings)
    
    # Transformer layers (e.g., self-attention and feed-forward layers)
    # You can customize the number of transformer layers and other parameters
    num_layers = 12
    for _ in range(num_layers):
        # Self-attention layer
        attention_output = layers.MultiHeadAttention(num_heads=12, key_dim=64)(embeddings, embeddings)
        attention_output = layers.Dropout(0.1)(attention_output)
        attention_output = layers.LayerNormalization(epsilon=1e-6)(embeddings + attention_output)
        
        # Feed-forward layer
        ffn_output = layers.Dense(patch_dim, activation='gelu')(attention_output)
        ffn_output = layers.Dropout(0.1)(ffn_output)
        embeddings = layers.LayerNormalization(epsilon=1e-6)(attention_output + ffn_output)
    
    # Extract class token and pass it to the classification head
    class_token = embeddings[:, 0]
    outputs = layers.Dense(num_classes, activation='softmax')(class_token)
    
    # Create the model
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='vision_transformer')
    return model

# Define image size and number of classes
image_size = (256, 256)  # Adjust based on your dataset
num_classes = 10  # Adjust based on your dataset

# Create ViT model
vit_model = create_vit_model(image_size, num_classes)

# Display model summary
vit_model.summary()

In [8]:
# # Compile the model
# vit_model.compile(optimizer='adam',
#                    loss='sparse_categorical_crossentropy',  # Use sparse categorical crossentropy for integer labels
#                    metrics=['accuracy'])

# # Define image dimensions and batch size
# # image_size = (256, 256)  # Adjusted to 256x256 as requested
# # batch_size = 32

# # Create an ImageDataGenerator for preprocessing
# train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
#     rescale=1./255  # Rescale pixel values to [0, 1]
# )

# # Load the training dataset using the ImageDataGenerator
# train_generator = train_datagen.flow_from_directory(
#     train_dir,
#     target_size=image_size,
#     batch_size=batch_size,
#     class_mode='categorical',  # for sparse categorical crossentropy loss
#     shuffle=True  # shuffle the data randomly
# )


# # Load the training dataset using the ImageDataGenerator
# # train_generator = train_datagen.flow_from_directory(
# #     train_dir,
# #     target_size=image_size,
# #     batch_size=batch_size,
# #     class_mode='categorical',  # Change class mode to produce one-hot encoded labels
# #     shuffle=True  # shuffle the data randomly
# # )



# # Define the number of training steps per epoch
# steps_per_epoch = train_generator.samples // batch_size

# Train the model
# history = vit_model.fit(train_generator,
#                         epochs=10,  # Adjust the number of epochs as needed
#                         steps_per_epoch=steps_per_epoch)




In [9]:
# # Define optimizer and loss function
# optimizer = torch.optim.Adam(vit_model.parameters(), lr=0.001)  # Adjust learning rate as needed
# criterion = nn.CrossEntropyLoss()

# # Training loop
# for epoch in range(10):  # Adjust number of epochs
#     for images, labels in train_generator:
#         # Forward pass
#         outputs = vit_model(images)
#         loss = criterion(outputs, labels)

#         # Backward pass and optimize
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         # Print training progress (optional)
#         print(f"Epoch: {epoch+1}/{10}, Loss: {loss.item():.4f}")

#         # Replace train_generator with your actual data loader
# # Ensure train_generator yields batches of images and labels
