In [1]:
!pip install vit_keras
!pip install tensorflow_addons

Collecting vit_keras
  Downloading vit_keras-0.1.2-py3-none-any.whl (24 kB)
Collecting validators (from vit_keras)
  Downloading validators-0.28.1-py3-none-any.whl (39 kB)
Installing collected packages: validators, vit_keras
Successfully installed validators-0.28.1 vit_keras-0.1.2
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow_addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow_addons
Successfully installed tensorflow_addons-0.23.0 typeguard-2.13.3


In [2]:
from google.colab import drive
drive.mount('/content/drive')
# filename = '/content/drive/My Drive/C2D2 Datasets/dataset_3d_new.h5' # this file contains 836 stacks of images only of Lahore
filename = '/content/drive/My Drive/C2D2 Datasets/dataset-001.h5' # this file contains 1996 stacks of images of Lahore, Aleppo, and Kathmandu (entire C2D2 dataset)

import numpy as np
import h5py
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Dropout, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from vit_keras import vit
from tensorflow.keras.callbacks import EarlyStopping

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Mounted at /content/drive



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



Found GPU at: /device:GPU:0


In [8]:
# Function to load and prepare the data
def load_and_split_data(filepath, num_samples=None, test_size=0.2, val_size=0.1):
    with h5py.File(filepath, 'r') as h5file:
        data_x = h5file['data_x'][:]
        data_y = h5file['data_y'][:]

        # Normalization
        data_x = data_x.astype('float32') / 255.0  

        if num_samples is not None:
            # Selecting subset of data if running over few samples
            data_x = data_x[:num_samples]  
            data_y = data_y[:num_samples]

        # Splitting the original stacks into training, validation, and testing
        train_x, test_x, train_y, test_y = train_test_split(
            data_x, data_y, test_size=test_size, random_state=42
        )
        train_x, val_x, train_y, val_y = train_test_split(
            train_x, train_y, test_size=val_size / (1 - test_size), random_state=42
        )

        # This function creates image pairs using only the first and third images 
        # (other versions of the file include making pairs of first and second, second and third, and first and third image)
        def create_pairs(x, y):
            
            images_first_third = np.stack((x[:, 0, :, :, :], x[:, 2, :, :, :]), axis=1)

            # Directly return pairs and labels
            return images_first_third, y  

        images_train, labels_train = create_pairs(train_x, train_y)
        images_val, labels_val = create_pairs(val_x, val_y)
        images_test, labels_test = create_pairs(test_x, test_y)

        labels_train = to_categorical(labels_train, num_classes=4)
        labels_val = to_categorical(labels_val, num_classes=4)
        labels_test = to_categorical(labels_test, num_classes=4)

        return images_train, images_val, images_test, labels_train, labels_val, labels_test

def create_siamese_vit_network(input_shape):
    base_network = create_vit_base_network(image_size=input_shape[1])

    input_a = Input(shape=input_shape[1:])
    input_b = Input(shape=input_shape[1:])

    processed_a = base_network(input_a)
    processed_b = base_network(input_b)

    combined_features = concatenate([processed_a, processed_b], axis=-1)
    combined_features = Dropout(0.1)(combined_features)
    classification_layer = Dense(128, activation='relu')(combined_features)
    outputs = Dense(4, activation='softmax')(classification_layer)

    model = Model(inputs=[input_a, input_b], outputs=outputs)
    model.compile(
        optimizer=Adam(learning_rate=0.00001),
        loss='categorical_crossentropy',
        metrics=['accuracy', 'Precision', 'Recall']
    )

    return model

# Creating the Vision Transformer base network: vit_b16 trained on ImageNet
def create_vit_base_network(image_size):
    vit_model = vit.vit_b16(
        image_size=image_size,
        activation='softmax',
        pretrained=True,
        include_top=False,
        pretrained_top=False
    )
    return vit_model

images_train, images_val, images_test, labels_train, labels_val, labels_test = load_and_split_data(filename)

model = create_siamese_vit_network(input_shape=(2, 256, 256, 3))

# Use early stopping (note to self: remember to also uncomment last line of history block)
# early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    [images_train[:, 0], images_train[:, 1]], labels_train,
    validation_data=([images_val[:, 0], images_val[:, 1]], labels_val),
    epochs=10,
    batch_size=10
    # callbacks=[early_stopping]
)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
# Evaluating on test dataset: 
test_metrics = model.evaluate([images_test[:, 0], images_test[:, 1]], labels_test)
print(f"Test Loss: {test_metrics[0]}")
print(f"Test Accuracy: {test_metrics[1]}")
print(f"Test Precision: {test_metrics[2]}")
print(f"Test Recall: {test_metrics[3]}")


Test Loss: 0.8182976245880127
Test Accuracy: 0.7225000262260437
Test Precision: 0.7329843044281006
Test Recall: 0.699999988079071
