<a href="https://colab.research.google.com/github/Rula-Islait/Research/blob/main/FashionMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

In [2]:

 (x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

In [3]:
num_classes = 10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [4]:
class ReverseAttention(layers.Layer):
    def __init__(self, num_heads=8, key_dim=64):
        super(ReverseAttention, self).__init__()
        self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)

    def call(self, inputs):
        attention_output = self.mha(inputs, inputs)
        return tf.sigmoid(1 - attention_output) + tf.math.tanh(attention_output)

In [5]:
class DynamicAttention(layers.Layer):
    def __init__(self, channels, num_heads=8, key_dim=64):
        super(DynamicAttention, self).__init__()
        self.channels = channels
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.mha = layers.MultiHeadAttention(num_heads=self.num_heads, key_dim=self.key_dim)
        self.batch_norm = layers.BatchNormalization()
        self.dropout = layers.Dropout(0.3)

    def call(self, inputs):
        attention_output = self.mha(inputs, inputs)
        attention_output = self.batch_norm(attention_output)
        attention_output = self.dropout(attention_output)
        return inputs * attention_output + inputs

In [6]:
class SequentialAttention(layers.Layer):
    def __init__(self, channels, num_heads=8, key_dim=64):
        super(SequentialAttention, self).__init__()
        self.channels = channels
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.mha = layers.MultiHeadAttention(num_heads=self.num_heads, key_dim=self.key_dim)
        self.conv1 = layers.Conv2D(self.channels, (3, 3), padding='same', activation='relu')
        self.conv2 = layers.Conv2D(self.channels, (3, 3), padding='same', activation='sigmoid')

    def call(self, inputs):
        x = self.conv1(inputs)
        attention_output = self.mha(x, x)
        attention_map = self.conv2(attention_output)
        return inputs + (inputs * attention_map)

In [7]:
class DenseAttention(tf.keras.layers.Layer):
    def __init__(self, channels):
        super(DenseAttention, self).__init__()
        self.channels = channels
        self.mha = tf.keras.layers.MultiHeadAttention(num_heads=4, key_dim=channels)
        self.pool = tf.keras.layers.GlobalAveragePooling2D()

    def build(self, input_shape):
        self.query_dim = input_shape[-1]

    def call(self, inputs):
        pooled_inputs = self.pool(inputs)
        pooled_inputs = tf.expand_dims(pooled_inputs, 1)
        attention_output = self.mha(pooled_inputs, pooled_inputs)
        return inputs * tf.expand_dims(attention_output, 1) + inputs

In [8]:
def build_model(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)
    x = inputs

    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = ReverseAttention(64)(x)

    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = DynamicAttention(128)(x)

    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = SequentialAttention(256)(x)

    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = DenseAttention(512)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

In [9]:
input_shape = (28, 28, 1)
model = build_model(input_shape, num_classes)
model.summary()

In [10]:
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [12]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

In [None]:
history = model.fit(
    datagen.flow(x_train, y_train, batch_size=64),
    epochs=80,
    validation_data=(x_test, y_test),
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/80


  self._warn_if_super_not_called()


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 222ms/step - accuracy: 0.4848 - loss: 1.5466 - val_accuracy: 0.4143 - val_loss: 1.9824 - learning_rate: 1.0000e-04
Epoch 2/80
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 201ms/step - accuracy: 0.7236 - loss: 0.7716 - val_accuracy: 0.7187 - val_loss: 0.6877 - learning_rate: 1.0000e-04
Epoch 3/80
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 200ms/step - accuracy: 0.7764 - loss: 0.6220 - val_accuracy: 0.7828 - val_loss: 0.5938 - learning_rate: 1.0000e-04
Epoch 4/80
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 199ms/step - accuracy: 0.8056 - loss: 0.5395 - val_accuracy: 0.8048 - val_loss: 0.5097 - learning_rate: 1.0000e-04
Epoch 5/80
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 199ms/step - accuracy: 0.8339 - loss: 0.4680 - val_accuracy: 0.8037 - val_loss: 0.4923 - learning_rate: 1.0000e-04
Epoch 6/80
[1m938/938[0m [32m━━━━━━━━━

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test loss: {loss:.4f}, Test accuracy: {accuracy:.4f}')

In [None]:
from tensorflow.keras.applications import ResNet50, VGG16, MobileNetV2
from tensorflow.keras import layers, models, optimizers

def prepare_data_for_pretrained(x_train, x_test):
    x_train_rgb = tf.image.resize(x_train, [224, 224])
    x_train_rgb = tf.image.grayscale_to_rgb(x_train_rgb)
    x_test_rgb = tf.image.resize(x_test, [224, 224])
    x_test_rgb = tf.image.grayscale_to_rgb(x_test_rgb)
    return x_train_rgb, x_test_rgb

x_train_rgb, x_test_rgb = prepare_data_for_pretrained(x_train, x_test)

def build_transfer_model(base_model, num_classes):
    base_model.trainable = False
    inputs = layers.Input(shape=(224,224,3))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inputs, outputs)
    return model

resnet_model = build_transfer_model(ResNet50(include_top=False, weights='imagenet', input_shape=(224,224,3)), num_classes)
mobilenet_model = build_transfer_model(MobileNetV2(include_top=False, weights='imagenet', input_shape=(224,224,3)), num_classes)
vgg_model = build_transfer_model(VGG16(include_top=False, weights='imagenet', input_shape=(224,224,3)), num_classes)

optimizer = optimizers.Adam(learning_rate=0.0001)
for model in [resnet_model, mobilenet_model, vgg_model]:
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


In [18]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import ResNet50, VGG16, MobileNetV2

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

num_classes = 10

y_train_int = y_train
y_test_int = y_test

def prepare_dataset(model_type='attention', img_size=(160,160), batch_size=32):
    def preprocess(image, label):
        if model_type != 'attention':  # تحويل grayscale -> RGB للـ pretrained models
            image = tf.image.resize(image, img_size)
            image = tf.image.grayscale_to_rgb(image)
        else:
            image = tf.cast(image, tf.float32)
        return image, label

    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train_int))
    train_ds = train_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    train_ds = train_ds.shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test_int))
    test_ds = test_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    test_ds = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return train_ds, test_ds

def build_attention_model(input_shape=(28,28,1), num_classes=10):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, (3,3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs, outputs)

# ===================== موديلات Transfer Learning =====================
def build_transfer_model(base_model, num_classes=10, input_shape=(160,160,3)):
    base_model.trainable = False
    inputs = layers.Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs, outputs)

# ===================== إعداد الموديلات =====================
attention_model = build_attention_model()
mobilenet_model = build_transfer_model(MobileNetV2(include_top=False, weights='imagenet'))
resnet_model = build_transfer_model(ResNet50(include_top=False, weights='imagenet'))
vgg_model = build_transfer_model(VGG16(include_top=False, weights='imagenet'))

models_dict = {
    "Attention_Model": (attention_model, 'attention'),
    "MobileNetV2": (mobilenet_model, 'pretrained'),
    "ResNet50": (resnet_model, 'pretrained'),
    "VGG16": (vgg_model, 'pretrained')
}

# ===================== Callbacks =====================
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)

# ===================== Compile & Train كل الموديلات =====================
results = {}
for name, (model, model_type) in models_dict.items():
    print(f"\nTraining {name} ...")
    train_ds, test_ds = prepare_dataset(model_type=model_type)
    model.compile(optimizer=optimizers.Adam(1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_ds, validation_data=test_ds, epochs=10, callbacks=[early_stopping, reduce_lr])
    loss, acc = model.evaluate(test_ds, verbose=0)
    results[name] = (loss, acc)

print("\n=== Comparison Results ===")
for k, v in results.items():
    print(f"{k}: Test Loss = {v[0]:.4f}, Test Accuracy = {v[1]:.4f}")


  mobilenet_model = build_transfer_model(MobileNetV2(include_top=False, weights='imagenet'))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step

Training Attention_Model ...
Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - accuracy: 0.7132 - loss: 0.8739 - val_accuracy: 0.8384 - val_loss: 0.4386 - learning_rate: 1.0000e-04
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4