<a href="https://colab.research.google.com/github/Sauleh-repo/Transformers/blob/main/ViTfashion_mnist(main).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

In [12]:
class ClassToken(Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def build(self, input_shape):
        self.cls = self.add_weight(
            name="cls",
            shape=(1, 1, input_shape[-1]),
            initializer="zeros",
            trainable=True,
        )

    def call(self, x):
        batch_size = tf.shape(x)[0]
        cls = tf.tile(self.cls, [batch_size, 1, 1])
        x = tf.concat([cls, x], axis=1)
        return x

def mlp(x, cf):
    x = Dense(cf["mlp_dim"], activation="gelu")(x)
    x = Dropout(cf["dropout_rate"])(x)
    x = Dense(cf["hidden_dim"])(x)
    x = Dropout(cf["dropout_rate"])(x)
    return x

def transformer_encoder(x, cf):
    skip_1 = x
    x = LayerNormalization()(x)
    x = MultiHeadAttention(
        num_heads=cf["num_heads"], key_dim=cf["hidden_dim"]
    )(x, x)
    x = Add()([x, skip_1])

    skip_2 = x
    x = LayerNormalization()(x)
    x = mlp(x, cf)
    x = Add()([x, skip_2])

    return x

def ViT(cf):

    input_shape = (cf["num_patches"], cf["patch_size"]*cf["patch_size"]*cf["num_channels"])
    inputs = Input(input_shape)

    patch_embed = Dense(cf["hidden_dim"])(inputs)

    positions = tf.range(start=0, limit=cf["num_patches"], delta=1)
    pos_embed = Embedding(input_dim=cf["num_patches"], output_dim=cf["hidden_dim"])(positions)
    embed = patch_embed + pos_embed


    x = ClassToken()(embed)

    for _ in range(cf["num_layers"]):
        x = transformer_encoder(x, cf)

    x = LayerNormalization()(x)
    x = x[:, 0, :]
    x = Dense(cf["num_classes"], activation="softmax")(x)

    model = Model(inputs, x)
    return model



In [13]:
def preprocess_image(image, patch_size):
    image = tf.reshape(image, (28, 28, 1))
    image = tf.image.resize(image, (28, 28))
    image = tf.expand_dims(image, axis=0)
    image = tf.cast(image, tf.float32) / 255.05
    patches = tf.image.extract_patches(
        images=image,
        sizes=[1, patch_size, patch_size, 1],
        strides=[1, patch_size, patch_size, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'
    )
    patches = tf.reshape(patches, (patches.shape[0], -1, patches.shape[-1]))
    return patches

if __name__ == "__main__":
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()


    subset_size = 5000
    x_train, y_train = x_train[:subset_size], y_train[:subset_size]
    x_test, y_test = x_test[:1000], y_test[:1000]

    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    patch_size = 14
    num_patches = (28 // patch_size) ** 2

    x_train = tf.image.extract_patches(
        images=tf.expand_dims(x_train, axis=-1),
        sizes=[1, patch_size, patch_size, 1],
        strides=[1, patch_size, patch_size, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'
    )
    x_train = tf.reshape(x_train, (x_train.shape[0], num_patches, -1))

    x_test = tf.image.extract_patches(
        images=tf.expand_dims(x_test, axis=-1),
        sizes=[1, patch_size, patch_size, 1],
        strides=[1, patch_size, patch_size, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'
    )
    x_test = tf.reshape(x_test, (x_test.shape[0], num_patches, -1))

    config = {
        "num_layers": 4,
        "hidden_dim": 128,
        "mlp_dim": 256,
        "num_heads": 4,
        "dropout_rate": 0.1,
        "num_patches": num_patches,
        "patch_size": patch_size,
        "num_channels": 1,
        "num_classes": 10
    }

    model = ViT(config)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))

    model.save("vit_fashion_mnist_cls.h5")

    print("Model trained. Ready to classify a new image.")

    y_pred = model.predict(x_test)
    y_pred_classes = tf.argmax(y_pred, axis=1).numpy()

    print("Evaluation Metrics on Test Set:")
    print(classification_report(y_test, y_pred_classes))


Epoch 1/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 84ms/step - accuracy: 0.4804 - loss: 1.5374 - val_accuracy: 0.6440 - val_loss: 0.8814
Epoch 2/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.7485 - loss: 0.6811 - val_accuracy: 0.7190 - val_loss: 0.8257
Epoch 3/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.7789 - loss: 0.6036 - val_accuracy: 0.7890 - val_loss: 0.5755
Epoch 4/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.7922 - loss: 0.5604 - val_accuracy: 0.7850 - val_loss: 0.6123
Epoch 5/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.8045 - loss: 0.5377 - val_accuracy: 0.8120 - val_loss: 0.5362
Epoch 6/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.8051 - loss: 0.5116 - val_accuracy: 0.8050 - val_loss: 0.5386
Epoch 7/10
[1m157/15



Model trained. Ready to classify a new image.
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step
Evaluation Metrics on Test Set:
              precision    recall  f1-score   support

           0       0.79      0.53      0.64       107
           1       0.96      0.93      0.95       105
           2       0.81      0.63      0.71       111
           3       0.80      0.78      0.79        93
           4       0.77      0.71      0.74       115
           5       0.84      0.93      0.88        87
           6       0.40      0.73      0.52        97
           7       0.93      0.82      0.87        95
           8       0.97      0.92      0.94        95
           9       0.93      0.93      0.93        95

    accuracy                           0.79      1000
   macro avg       0.82      0.79      0.80      1000
weighted avg       0.82      0.79      0.79      1000



In [4]:
    new_image = x_test[146]

    prediction = model.predict(tf.expand_dims(new_image,axis=0))
    predicted_class = tf.argmax(prediction, axis=1).numpy()[0]
    print(f"Predicted Class: {predicted_class}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Predicted Class: 1


In [46]:
import tensorflow as tf

# Load the FashionMNIST dataset
#(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

# Print the output values with their indices
#for i in range(200):
 # print(f"Index: {i}, Output Value (Label): {y_train[i]}")

# You can do the same for the test set if needed:
for i in range(200):
   print(f"Index: {i}, Output Value (Label): {y_test[i]}")

Index: 0, Output Value (Label): 9
Index: 1, Output Value (Label): 2
Index: 2, Output Value (Label): 1
Index: 3, Output Value (Label): 1
Index: 4, Output Value (Label): 6
Index: 5, Output Value (Label): 1
Index: 6, Output Value (Label): 4
Index: 7, Output Value (Label): 6
Index: 8, Output Value (Label): 5
Index: 9, Output Value (Label): 7
Index: 10, Output Value (Label): 4
Index: 11, Output Value (Label): 5
Index: 12, Output Value (Label): 7
Index: 13, Output Value (Label): 3
Index: 14, Output Value (Label): 4
Index: 15, Output Value (Label): 1
Index: 16, Output Value (Label): 2
Index: 17, Output Value (Label): 4
Index: 18, Output Value (Label): 8
Index: 19, Output Value (Label): 0
Index: 20, Output Value (Label): 2
Index: 21, Output Value (Label): 5
Index: 22, Output Value (Label): 7
Index: 23, Output Value (Label): 9
Index: 24, Output Value (Label): 1
Index: 25, Output Value (Label): 4
Index: 26, Output Value (Label): 6
Index: 27, Output Value (Label): 0
Index: 28, Output Value (Label