### Import Libraries

In [63]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import os
import shutil

!pip install tensorflow
!pip install tensorflow-addons
!pip install huggingface-hub
# Import ViT libraries
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import imagenet_utils
from huggingface_hub import from_pretrained_keras



#### Testing Environment

In [64]:
import tensorflow as tf
print(tf.__version__)
print("GPU", "available" if tf.config.list_physical_devices("GPU") else "not available")

2.17.0
GPU available


### Prepared for Dataset

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


###### We start with training model

In [65]:
train_folder = 'drive/MyDrive/Dataset/Fruit/train'

data = []
for label in os.listdir(train_folder):
    class_folder = os.path.join(train_folder, label)
    if os.path.isdir(class_folder):
        for image_file in os.listdir(class_folder):
          image_path = os.path.join(class_folder, image_file)
          data.append([image_path, label])

df = pd.DataFrame(data, columns=['image_path', 'label'])
df.to_csv('train_dataset.csv', index = False)
# from google.colab import files
# files.download('train_dataset.csv')
df.tail(100)

Unnamed: 0,image_path,label
1659,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
1660,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
1661,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
1662,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
1663,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
...,...,...
1754,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
1755,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
1756,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot
1757,drive/MyDrive/Dataset/Fruit/train/Apricot/Apri...,Apricot


##### Now with testing model

In [66]:
test_folder = 'drive/MyDrive/Dataset/Fruit/test'

test_data = []

for image_files in os.listdir(test_folder):
  image_path = os.path.join(test_folder, image_files)
  test_data.append([image_path])

test_df = pd.DataFrame(test_data, columns=['image_path'])
test_df.to_csv('test_dataset.csv', index = False)
# from google.colab import files
# files.download('test_dataset.csv')
test_df.head(5)

Unnamed: 0,image_path
0,drive/MyDrive/Dataset/Fruit/test/4748.jpg
1,drive/MyDrive/Dataset/Fruit/test/5274.jpg
2,drive/MyDrive/Dataset/Fruit/test/4663.jpg
3,drive/MyDrive/Dataset/Fruit/test/5236.jpg
4,drive/MyDrive/Dataset/Fruit/test/4791.jpg


### Using ViT model to train the dataset

#### Loading and Preprocessing the Dataset

In [67]:
train_dir = 'drive/MyDrive/Dataset/Fruit/train'
test_dir = 'drive/MyDrive/Dataset/Fruit/test'

# Loading the dataset
train_dataset = image_dataset_from_directory(train_dir,
                                             shuffle=True,
                                             batch_size=32,
                                             image_size=(224, 224))
test_dataset = image_dataset_from_directory(test_dir,
                                            shuffle=False,
                                            batch_size=32,
                                            label_mode=None,
                                            image_size=(224, 224))

Found 1759 files belonging to 33 classes.
Found 5641 files.


#### Data Augmentation and Preprocessing

In [68]:
data_augmentation = tf.keras.Sequential([
  layers.RandomFlip('horizontal'),
  layers.RandomRotation(0.2),
  layers.RandomZoom(0.2)
])

def preprocess(image,label=None) :
  image = tf.cast(image, tf.float32) / 255.0
  if label is None:
    return image # Return only the image if label is None
  else:
    return image, label

train_dataset = train_dataset.map(preprocess)
test_dataset = test_dataset.map(preprocess)

#### Load Pre-trained ViT Model

In [71]:
# Login hugginface account
!pip install huggingface_hub
from huggingface_hub import notebook_login

# Log in to your Hugging Face account
notebook_login()




VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [107]:
# Load a pre-trained Vision Transformer (ViT) model from Hugging Face
vit_model = from_pretrained_keras('keras-/vit-base-patch16-224')

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

OSError: SavedModel file does not exist at: /root/.cache/huggingface/hub/models--google--vit-base-patch16-224/snapshots/3f49326eb077187dfe1c2a2bb15fbd74e6ab91e3/{saved_model.pbtxt|saved_model.pb}

#### Sequential

In [106]:
# Freeze the base ViT model
vit_model.trainable = False
num_classes = 5

# Build the complete model
model = Sequential([
    layers.InputLayer(input_shape=(224, 224, 3)),
    vit_model,
    layers.GlobalAveragePooling2D(),
    data_augmentation,
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='softmax')  # num_classes is the number of fruit categories
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='sparse_categorical_crossentropy',  # Change this if your labels are one-hot encoded
              metrics=['accuracy'])

model.summary()


ValueError: Only instances of `keras.Layer` can be added to a Sequential model. Received: <transformers.models.vit.modeling_tf_vit.TFViTForImageClassification object at 0x7e55b4f3f280> (of type <class 'transformers.models.vit.modeling_tf_vit.TFViTForImageClassification'>)

In [103]:
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)

Epoch 1/10


ValueError: Exception encountered when calling layer 'patch_embeddings' (type TFViTPatchEmbeddings).

Input 0 of layer "projection" is incompatible with the layer: expected axis -1 of input shape to have value 3, but received input with shape (None, 224, 3, 224)

Call arguments received by layer 'patch_embeddings' (type TFViTPatchEmbeddings):
  • pixel_values=tf.Tensor(shape=(None, 224, 224, 3), dtype=float32)
  • interpolate_pos_encoding=None
  • training=False

In [None]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test accuracy: {test_accuracy}")