In [1]:
!pip install split_folders

Collecting split_folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split_folders
Successfully installed split_folders-0.5.1


In [2]:
import os
import splitfolders
import joblib
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Rescaling, RandomFlip, RandomRotation, RandomZoom

def preprocess_data():
    

    normalization = Rescaling(1./255)

    return tf.keras.Sequential([
        normalization
    ])

preprocessing_function = preprocess_data()

loc = "/kaggle/input/autism-image-data/AutismDataset/consolidated"
os.makedirs('output', exist_ok=True)
os.makedirs('output/train', exist_ok=True)
os.makedirs('output/val', exist_ok=True)
os.makedirs('output/test', exist_ok=True)
splitfolders.ratio(loc, output="output", seed=42, ratio=(0.80, 0.1, 0.1))

train_dir = "output/train"
test_dir = "output/test"
val_dir = "output/val"



train_data = image_dataset_from_directory(
    train_dir, 
    batch_size=32, 
    image_size=(224, 224), 
    label_mode='categorical', 
    shuffle=True, 
    seed=42
).map(lambda x, y: (preprocessing_function(x), y))

test_data = image_dataset_from_directory(
    test_dir, 
    batch_size=32, 
    image_size=(224, 224), 
    label_mode='categorical', 
    shuffle=False, 
    seed=42
).map(lambda x, y: (preprocessing_function(x), y))

val_data = image_dataset_from_directory(
    val_dir, 
    batch_size=32, 
    image_size=(224, 224), 
    label_mode='categorical', 
    shuffle=False, 
    seed=42
).map(lambda x, y: (preprocessing_function(x), y))

Copying files: 2940 files [00:30, 97.86 files/s] 


Found 2352 files belonging to 2 classes.
Found 294 files belonging to 2 classes.
Found 294 files belonging to 2 classes.


In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models

IMG_SIZE = 224
PATCH_SIZE = 8
NUM_PATCHES = (IMG_SIZE // PATCH_SIZE) ** 2
PROJECTION_DIM = 128
NUM_HEADS = 8
TRANSFORMER_LAYERS = 8
MLP_UNITS = [256, 128]
NUM_CLASSES = 2 
DROPOUT_RATE = 0.1

class PatchEmbedding(layers.Layer):
    def __init__(self, patch_size, projection_dim):
        super(PatchEmbedding, self).__init__()
        self.projection = layers.Dense(units=projection_dim)
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patches = tf.reshape(patches, [batch_size, -1, patches.shape[-1]])
        return self.projection(patches)

def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

def build_vit_model():
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    patches = PatchEmbedding(PATCH_SIZE, PROJECTION_DIM)(inputs)

    positions = tf.range(start=0, limit=NUM_PATCHES, delta=1)
    position_embedding = layers.Embedding(input_dim=NUM_PATCHES, output_dim=PROJECTION_DIM)(positions)
    encoded_patches = patches + position_embedding
    
    for _ in range(TRANSFORMER_LAYERS):
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = layers.MultiHeadAttention(num_heads=NUM_HEADS, key_dim=PROJECTION_DIM)(x1, x1)
        x2 = layers.Add()([attention_output, encoded_patches])
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = mlp(x3, hidden_units=MLP_UNITS, dropout_rate=DROPOUT_RATE)
        encoded_patches = layers.Add()([x3, x2])
    
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.GlobalAveragePooling1D()(representation)
    
    features = mlp(representation, hidden_units=MLP_UNITS, dropout_rate=DROPOUT_RATE)
    
    logits = layers.Dense(NUM_CLASSES, activation='softmax')(features)
    
    model = models.Model(inputs=inputs, outputs=logits)
    return model

vit_model = build_vit_model()

vit_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

vit_model.summary()

history = vit_model.fit(
    train_data,
    validation_data=val_data,
    epochs=20 
)

test_loss, test_accuracy = vit_model.evaluate(test_data)
print(f"Test accuracy: {test_accuracy}")

vit_model.save("vit_model.keras")
print("Model saved successfully.")


Epoch 1/20


I0000 00:00:1729077547.150508      77 service.cc:145] XLA service 0x79c6c4005e40 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1729077547.150578      77 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0

I0000 00:00:1729077590.502744      77 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 1s/step - accuracy: 0.5204 - loss: 0.7400 - val_accuracy: 0.5000 - val_loss: 0.7187
Epoch 2/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 545ms/step - accuracy: 0.5494 - loss: 0.6899 - val_accuracy: 0.5000 - val_loss: 0.7006
Epoch 3/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 545ms/step - accuracy: 0.6042 - loss: 0.6678 - val_accuracy: 0.5918 - val_loss: 0.6461
Epoch 4/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 545ms/step - accuracy: 0.6388 - loss: 0.6401 - val_accuracy: 0.5952 - val_loss: 0.6557
Epoch 5/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 545ms/step - accuracy: 0.6672 - loss: 0.6175 - val_accuracy: 0.6497 - val_loss: 0.6316
Epoch 6/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 545ms/step - accuracy: 0.6480 - loss: 0.6276 - val_accuracy: 0.6259 - val_loss: 0.6327
Epoch 7/20
[1m74/74[0m [32m━━━━━