In [1]:
import zipfile, os
import numpy as np
import tensorflow as tf
from PIL import Image
from datasets import load_dataset
from tensorflow.keras import layers
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from transformers import AutoImageProcessor
from transformers import DefaultDataCollator
from transformers import create_optimizer
from transformers import TFAutoModelForImageClassification

In [2]:
gpu = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True) #limits gpu memory

In [2]:
!gdown 14m2XW31x_UWAqUeoM0SNVsH3kwyBIAVy

zip_ref = zipfile.ZipFile('data.zip', 'r')
zip_ref.extractall()
zip_ref.close()

Downloading...
From (original): https://drive.google.com/uc?id=14m2XW31x_UWAqUeoM0SNVsH3kwyBIAVy
From (redirected): https://drive.google.com/uc?id=14m2XW31x_UWAqUeoM0SNVsH3kwyBIAVy&confirm=t&uuid=743331cf-719b-4f07-bc73-d29eb1164af5
To: /home/remunata/dev/bangkit/ML-Progress/Vision Transformer (ViT)/data.zip
100%|███████████████████████████████████████| 2.48G/2.48G [47:36<00:00, 870kB/s]


In [3]:
dataset = load_dataset("imagefolder", data_dir='data')

labels = dataset['train'].features['label'].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

Resolving data files:   0%|          | 0/6650 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/1869 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/1870 [00:00<?, ?it/s]

In [4]:
checkpoint = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)



In [5]:
size = (image_processor.size["height"], image_processor.size["width"])

train_data_augmentation = tf.keras.Sequential(
    [
        layers.RandomCrop(size[0], size[1]),
        layers.Rescaling(scale=1.0 / 255.0),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(height_factor=0.2, width_factor=0.2),
    ],
    name="train_data_augmentation",
)

val_data_augmentation = tf.keras.Sequential(
    [
        layers.CenterCrop(size[0], size[1]),
        layers.Rescaling(scale=1.0 / 255.0),
    ],
    name="val_data_augmentation",
)

In [6]:
def convert_to_tf_tensor(image: Image):
    np_image = np.array(image)
    tf_image = tf.convert_to_tensor(np_image)
    return tf.expand_dims(tf_image, 0)


def preprocess_train(example_batch):
    images = [
        train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
    ]
    example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
    return example_batch


def preprocess_val(example_batch):
    images = [
        val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
    ]
    example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
    return example_batch

In [7]:
dataset["train"].set_transform(preprocess_train)
dataset["validation"].set_transform(preprocess_val)
dataset["test"].set_transform(preprocess_val)

In [8]:
batch_size = 8
num_epochs = 30
num_train_steps = len(dataset["train"]) * num_epochs
learning_rate = 3e-5
weight_decay_rate = 0.01

In [9]:
data_collator = DefaultDataCollator(return_tensors="tf")

tf_train_dataset = dataset["train"].to_tf_dataset(
    columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
)

tf_eval_dataset = dataset["validation"].to_tf_dataset(
    columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
)

tf_test_dataset = dataset["test"].to_tf_dataset(
    columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
)

In [10]:
model = TFAutoModelForImageClassification.from_pretrained(
    checkpoint,
    id2label=id2label,
    label2id=label2id,
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=0,
)

loss = SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

In [12]:
model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7fe8b6965850>

In [13]:
loss, accuracy = model.evaluate(tf_test_dataset)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Loss: 0.4102
Test Accuracy: 90.48%


In [14]:
version = '1'
export_path = os.path.join("saved_model", version)

model.save(export_path, save_format='tf')

INFO:tensorflow:Assets written to: saved_model/1/assets


INFO:tensorflow:Assets written to: saved_model/1/assets
