In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import pandas as pd  
import shutil

2024-11-10 16:15:50.171073: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-11-10 16:15:50.561633: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-11-10 16:15:50.561649: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-11-10 16:15:51.607360: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-

In [3]:
base_dir = r'/home/prashantb/Documents/Prashant/Thesis/FinalData'

train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

In [4]:
# Data Augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # 20% of training data used as validation data
)

In [5]:
# Data generator for training data
train_generator = train_datagen.flow_from_directory(
    '/home/prashantb/Documents/Prashant/Thesis/FinalData/train',
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    subset='training'  # Set as training data
)

Found 13964 images belonging to 2 classes.


In [6]:
# Data generator for validation data
validation_generator = train_datagen.flow_from_directory(
    '/home/prashantb/Documents/Prashant/Thesis/FinalData/train',
    target_size=(128, 128),
    batch_size=16,
    class_mode='binary',
    subset='validation'  # Set as validation data
)

Found 3490 images belonging to 2 classes.


In [7]:
# Data generator for test data (no augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    '/home/prashantb/Documents/Prashant/Thesis/FinalData/test',
    target_size=(128, 128),
    batch_size=16,
    class_mode='binary'
)

Found 2096 images belonging to 2 classes.


In [9]:
 #Define the simplified CNN model
def create_cnn_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3), kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(64, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(128, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        # Global Average Pooling layer instead of Flattening
        GlobalAveragePooling2D(),

        # Fully connected dense layer
        Dense(256, activation='relu'),
        Dropout(0.3),  # Reduced Dropout rate

        # Output layer with sigmoid for binary classification
        Dense(1, activation='sigmoid')
    ])
    return model

In [11]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization, Input, Concatenate


In [12]:
# Vision Transformer parameters
num_heads = 8
num_layers = 6
mlp_dim = 2048
hidden_dim = 512
patch_size = 16
num_patches = (128 // patch_size) ** 2  # (128 / 16)^2 = 64 patches
dropout_rate = 0.1

# Patch and Position Embedding Layer
class PatchEmbedding(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEmbedding, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

# Transformer Block
def transformer_block(inputs, num_heads, mlp_dim, dropout_rate):
    x1 = layers.LayerNormalization(epsilon=1e-6)(inputs)
    attention_output = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=hidden_dim, dropout=dropout_rate
    )(x1, x1)
    x2 = layers.Add()([attention_output, inputs])

    x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
    x3 = layers.Dense(mlp_dim, activation=tf.nn.gelu)(x3)
    x3 = layers.Dropout(dropout_rate)(x3)
    x3 = layers.Dense(hidden_dim)(x3)
    return layers.Add()([x3, x2])

# Vision Transformer Model
def create_vit_classifier():
    inputs = layers.Input(shape=(128, 128, 3))
    patches = layers.Conv2D(hidden_dim, kernel_size=patch_size, strides=patch_size)(inputs)
    patches = layers.Reshape((num_patches, hidden_dim))(patches)
    encoded_patches = PatchEmbedding(num_patches, hidden_dim)(patches)

    for _ in range(num_layers):
        encoded_patches = transformer_block(encoded_patches, num_heads, mlp_dim, dropout_rate)

    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    features = layers.Dense(mlp_dim, activation=tf.nn.gelu)(representation)
    logits = layers.Dense(1)(features)
    outputs = layers.Activation("sigmoid")(logits)
    return tf.keras.Model(inputs=inputs, outputs=outputs)


In [13]:
# Function to remove the last layer and get the feature extraction part of the model
def get_feature_extractor(model):
    model = Model(inputs=model.input, outputs=model.layers[-2].output)
    return model

# Define the hybrid model
def create_hybrid_model(cnn_model, vit_model, input_shape):
    # Create input layer
    input_layer = Input(shape=input_shape)
    
    # Get the CNN and ViT feature extraction parts
    cnn_features = cnn_model(input_layer)
    vit_features = vit_model(input_layer)
    
    # Concatenate the features
    combined_features = Concatenate()([cnn_features, vit_features])
    
    # Add a few dense layers for final classification
    x = Dense(512, activation='relu')(combined_features)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output_layer = Dense(1, activation='sigmoid')(x)
    
    # Create the hybrid model
    hybrid_model = Model(inputs=input_layer, outputs=output_layer)
    return hybrid_model

# Create the individual models
cnn_model = create_cnn_model()
vit_model = create_vit_classifier()

2024-11-10 16:19:45.249499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2024-11-10 16:19:45.249849: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2024-11-10 16:19:45.249890: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (prashantB-viveka): /proc/driver/nvidia/version does not exist
2024-11-10 16:19:45.250934: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-11-10 16:19:46.086796: W tensorflow/tsl/framework/cpu_allocato

In [17]:
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report


In [18]:
# Create the feature extraction models
cnn_feature_extractor = get_feature_extractor(cnn_model)
vit_feature_extractor = get_feature_extractor(vit_model)

# Input shape for the models
input_shape = (128, 128, 3)

# Create the hybrid model
hybrid_model = create_hybrid_model(cnn_feature_extractor, vit_feature_extractor, input_shape)




In [19]:
# Compile the hybrid model with additional metrics
hybrid_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

# Print the hybrid model summary
hybrid_model.summary()

# Assuming you have your data generators ready
# Replace `train_generator` and `validation_generator` with your actual generators
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    validation_split=0.2  # Using 20% of data for validation
)


# Evaluate the hybrid model
loss, accuracy, precision, recall = hybrid_model.evaluate(validation_generator)
print(f'Validation Accuracy: {accuracy:.4f}')
print(f'Validation Precision: {precision:.4f}')
print(f'Validation Recall: {recall:.4f}')

# Calculate additional metrics
y_true = validation_generator.classes
y_pred = hybrid_model.predict(validation_generator)
y_pred = (y_pred > 0.5).astype(int).reshape(-1)

# Calculate F1-score
f1 = f1_score(y_true, y_pred)
print(f'Validation F1-score: {f1:.4f}')


Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 model_7 (Functional)           (None, 256)          127168      ['input_4[0][0]']                
                                                                                                  
 model_8 (Functional)           (None, 1)            130822145   ['input_4[0][0]']                
                                                                                                  
 concatenate_2 (Concatenate)    (None, 257)          0           ['model_7[0][0]',          

In [20]:
# Classification report
report = classification_report(y_true, y_pred, target_names=['Class 0', 'Class 1'])
print(report)

              precision    recall  f1-score   support

     Class 0       0.27      0.11      0.16       996
     Class 1       0.71      0.88      0.79      2494

    accuracy                           0.66      3490
   macro avg       0.49      0.50      0.47      3490
weighted avg       0.59      0.66      0.61      3490

