In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers  # Ensure this line is included

print("TensorFlow version:", tf.__version__)
print("TensorFlow Addons version:", tfa.__version__)
print("NumPy version:", np.__version__)


2024-11-15 19:59:20.127461: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-11-15 19:59:20.588026: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-11-15 19:59:20.588044: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-11-15 19:59:22.055062: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-

TensorFlow version: 2.11.0
TensorFlow Addons version: 0.19.0
NumPy version: 1.23.5




In [2]:
import time
import logging
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import os
import pandas as pd  
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [3]:
# Set up logging to save the output
logging.basicConfig(filename='model_comparison.log', level=logging.INFO)

# Data Augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # 20% of training data used as validation data
)

batch_size = 32

# Data generators for training and validation data
train_generator = train_datagen.flow_from_directory(
    '/home/prashantb/Documents/Prashant/Thesis/FinalData/train',
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'  # Set as training data
)

validation_generator = train_datagen.flow_from_directory(
    '/home/prashantb/Documents/Prashant/Thesis/FinalData/train',
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'  # Set as validation data
)

Found 13964 images belonging to 2 classes.
Found 3490 images belonging to 2 classes.


In [4]:
# Vision Transformer parameters
num_heads = 8
num_layers = 6
mlp_dim = 2048
hidden_dim = 512
patch_size = 16
num_patches = (128 // patch_size) ** 2  # (128 / 16)^2 = 64 patches
dropout_rate = 0.1

# Patch and Position Embedding Layer
class PatchEmbedding(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEmbedding, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

# Transformer Block
def transformer_block(inputs, num_heads, mlp_dim, dropout_rate):
    x1 = layers.LayerNormalization(epsilon=1e-6)(inputs)
    attention_output = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=hidden_dim, dropout=dropout_rate
    )(x1, x1)
    x2 = layers.Add()([attention_output, inputs])

    x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
    x3 = layers.Dense(mlp_dim, activation=tf.nn.gelu)(x3)
    x3 = layers.Dropout(dropout_rate)(x3)
    x3 = layers.Dense(hidden_dim)(x3)
    return layers.Add()([x3, x2])

# Vision Transformer Model
def create_vit_classifier():
    inputs = layers.Input(shape=(128, 128, 3))
    patches = layers.Conv2D(hidden_dim, kernel_size=patch_size, strides=patch_size)(inputs)
    patches = layers.Reshape((num_patches, hidden_dim))(patches)
    encoded_patches = PatchEmbedding(num_patches, hidden_dim)(patches)

    for _ in range(num_layers):
        encoded_patches = transformer_block(encoded_patches, num_heads, mlp_dim, dropout_rate)

    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    features = layers.Dense(mlp_dim, activation=tf.nn.gelu)(representation)
    logits = layers.Dense(1)(features)
    outputs = layers.Activation("sigmoid")(logits)
    return models.Model(inputs=inputs, outputs=outputs)


# Create and compile the ViT model
vit_model = create_vit_classifier()
vit_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)


2024-11-15 19:59:32.071144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2024-11-15 19:59:32.071166: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2024-11-15 19:59:32.071182: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (prashantB-viveka): /proc/driver/nvidia/version does not exist
2024-11-15 19:59:32.071776: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6)

In [6]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, classification_report
import numpy as np

In [None]:
start_time = time.time()

history_vit = vit_model.fit(
    train_generator,
    epochs=25,
    validation_data=validation_generator,
    callbacks=[early_stopping, reduce_lr]
)

end_time = time.time()
execution_time_vit = end_time - start_time

# Log training and evaluation results
logging.info(f'ViT Model - Training History: {history_vit.history}')
logging.info(f'Execution Time for ViT Model: {execution_time_vit} seconds')

# Evaluate the model on the validation data
y_true = validation_generator.classes
y_pred = (vit_model.predict(validation_generator) > 0.5).astype("int32")

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Log metrics
logging.info(f'ViT Model - Accuracy: {accuracy:.4f}')
logging.info(f'ViT Model - Precision: {precision:.4f}')
logging.info(f'ViT Model - Recall: {recall:.4f}')
logging.info(f'ViT Model - F1-score: {f1:.4f}')

# Output results to the console
print(f"ViT Model - Accuracy: {accuracy:.4f}")
print(f"ViT Model - Precision: {precision:.4f}")
print(f"ViT Model - Recall: {recall:.4f}")
print(f"ViT Model - F1-score: {f1:.4f}")
print(f"ViT Model - Execution Time: {execution_time_vit} seconds")

y_pred = (vit_model.predict(validation_generator) > 0.5).astype("int32")

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Log metrics
logging.info(f'ViT Model - Accuracy: {accuracy:.4f}')
logging.info(f'ViT Model - Precision: {precision:.4f}')
logging.info(f'ViT Model - Recall: {recall:.4f}')
logging.info(f'ViT Model - F1-score: {f1:.4f}')

# Output results to the console
print(f"ViT Model - Accuracy: {accuracy:.4f}")
print(f"ViT Model - Precision: {precision:.4f}")
print(f"ViT Model - Recall: {recall:.4f}")
print(f"ViT Model - F1-score: {f1:.4f}")
print(f"ViT Model - Execution Time: {execution_time_vit} seconds")


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
ViT Model - Accuracy: 0.5702
ViT Model - Precision: 0.7037
ViT Model - Recall: 0.6885
ViT Model - F1-score: 0.6960
ViT Model - Execution Time: 52594.05940413475 seconds
