# Vision Transformer

## Imports

In [97]:
# Imports

# System Imports
import os
import time

# Data Imports
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import scipy as sp
import cv2

import helper_functions as hf
import tensorflow as tf

from transformers import ViTFeatureExtractor, TFViTForImageClassification
import datasets

tf.keras.backend.set_image_data_format('channels_first')

## Reading in Data

In [41]:
# Setting paths

train_data_path = './data/archive/train'
test_data_path = './data/archive/test'

In [42]:
# Viewing categories

categories = os.listdir(train_data_path)
print(f"The categories are: {categories}")

The categories are: ['daisy', 'rose', 'tulip', 'dandelion', 'sunflower']


In [43]:
# Configurations

SEED = 0
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.20
EPOCHS = 10

In [99]:
def create_image_folder_dataset(root_path):
  """creates `Dataset` from image folder structure"""

  # get class names by folders names
  _CLASS_NAMES= os.listdir(root_path)
  # defines `datasets` features`
  features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
  # temp list holding datapoints for creation
  img_data_files=[]
  label_data_files=[]
  # load images into list for creation
  for img_class in os.listdir(root_path):
    for img in os.listdir(os.path.join(root_path,img_class)):
      path_=os.path.join(root_path,img_class,img)
      img_data_files.append(path_)
      label_data_files.append(img_class)
  # create dataset
  ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
  return ds

In [100]:
df = create_image_folder_dataset("./data/archive/train")

In [101]:
df

Dataset({
    features: ['img', 'label'],
    num_rows: 2746
})

In [102]:
img_class_labels = df.features["label"].names
img_class_labels

['daisy', 'rose', 'tulip', 'dandelion', 'sunflower']

In [44]:
# Creating the training set

training_set = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_path,
    validation_split=VALIDATION_SPLIT,
    subset="training",
    seed=SEED,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical",
    class_names=categories
)

Found 2746 files belonging to 5 classes.
Using 2197 files for training.


In [45]:
# Creating the validation set

validation_set = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_path,
    validation_split=VALIDATION_SPLIT,
    subset="validation",
    seed=SEED,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical",
    class_names=categories
)

Found 2746 files belonging to 5 classes.
Using 549 files for validation.


## Modeling

In [46]:
model_path = 'google/vit-base-patch16-224'
model = TFViTForImageClassification.from_pretrained(model_path)
feature_extractor = ViTFeatureExtractor.from_pretrained(model_path)
# feature_extractor

All model checkpoint layers were used when initializing TFViTForImageClassification.

All the layers of TFViTForImageClassification were initialized from the model checkpoint at google/vit-base-patch16-224.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


### Data Preprocessing

In [66]:
# Changing channel axis to the first dimension

train = training_set.map(lambda x, y: (tf.experimental.numpy.moveaxis(x, -1, 1), y)) 
valid = validation_set.map(lambda x, y: (tf.experimental.numpy.moveaxis(x, -1, 1), y)) 

In [48]:
# def preprocess_images(examples):

#     print(examples.shape)
#     images = examples['img']
#     images = [np.array(image, dtype=np.uint8) for image in images]
#     # images = [np.moveaxis(image, source=-1, destination=0) for image in images]
#     inputs = feature_extractor(images=images)
#     examples['pixel_values'] = inputs['pixel_values']

#     return examples.prefetch(buffer_size=AUTOTUNE)

In [93]:
def process_example(image, label):
    inputs = feature_extractor(image.numpy(), return_tensors='tf')
    print(inputs)
    inputs['labels'] = label
    return inputs

def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch[0]], return_tensors='tf')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch[1]
    return inputs

In [94]:
for image, label in train.take(1):
    # print(tf.shape(feature_extractor(image[0].numpy(), return_tensors='tf')['pixel_values']))
    # print(tf.shape(image[0]), tf.shape(label[0]))
    # print(feature_extractor(image[0].numpy()))
    break

tf.Tensor([  3 224 224], shape=(3,), dtype=int32) tf.Tensor([5], shape=(1,), dtype=int32)


In [96]:
train = train.map(process_example) 
valid = valid.map(process_example) 

# results = []
# for image,label in temp_set.take(1):
#     results.append(process_example([image, label]))
#     # print(tf.shape(data[1]))
#     break

TypeError: DatasetV2.map() got an unexpected keyword argument 'batched'

In [17]:
# inputs
pixel_values = tf.keras.layers.Input(shape=(3,224,224), name='pixel_values', dtype='float32')

# model layer
vit = model.vit(pixel_values)[0]
classifier = tf.keras.layers.Dense(5, activation='softmax', name='outputs')(vit[:, 0, :])

# model
keras_model = tf.keras.Model(inputs=pixel_values, outputs=classifier)

In [19]:
tf.keras.backend.clear_session()

start_time = time.time()

## Compiling the model
keras_model.compile(
    optimizer='adam',
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy']
)

vit_history = keras_model.fit(
  training_set,
  validation_data=validation_set,
  epochs=EPOCHS
)

elasped_time = time.time() - start_time
# model_elapsed_time["vit"] = elasped_time

Epoch 1/10


ValueError: in user code:

    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 3, 224, 224), found shape=(None, 224, 224, 3)
