In [1]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2,MobileNetV3Small,EfficientNetV2B0
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import pandas as pd
import numpy as np
import os
import datetime
from sklearn.metrics import roc_curve, auc
import cv2
from tensorflow.keras.mixed_precision import set_global_policy, Policy

# # Enable memory growth for both T4 GPUs
# gpus = tf.config.list_physical_devices('GPU')
# print("Available GPUs:", [gpu.name for gpu in gpus])  # Should list two T4 GPUs
# for gpu in gpus:
#     tf.config.experimental.set_memory_growth(gpu, True)



# Initialize MirroredStrategy
strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])
print("Number of GPUs in strategy:", strategy.num_replicas_in_sync)  # Should print 2

2025-07-15 02:40:13.800559: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752547213.996967      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752547214.056576      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Number of GPUs in strategy: 2


I0000 00:00:1752547226.742760      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [2]:
IMG_SIZE = (112, 112)
BATCH_SIZE = 128
EPOCHS = 20

data_dir = "/kaggle/input/11-785-fall-20-homework-2-part-2"
pairs_file = os.path.join(data_dir, "verification_pairs_val.txt")

In [3]:

def create_data_generators(data_dir):


    # Define data augmentation for training
    train_datagen = ImageDataGenerator(
      
        zoom_range = 0.3,
        shear_range = 0.2,        
        fill_mode="nearest",
        horizontal_flip=True
    )

    # Define validation data generator (no augmentation, only rescaling)
    val_datagen = ImageDataGenerator()

    # Classification data generator for training
    train_generator = train_datagen.flow_from_directory(
        os.path.join(data_dir, 'classification_data/train_data'),
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        color_mode = 'rgb',
        class_mode='categorical',
        shuffle = True 
    )

    # Classification data generator for validation
    val_generator = val_datagen.flow_from_directory(
        os.path.join(data_dir, 'classification_data/val_data'),
        target_size=IMG_SIZE,
        color_mode = 'rgb',
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle = False
    )

    

    # Return datasets and number of classes
    return train_generator, val_generator

In [4]:

train_dataset, val_dataset = create_data_generators(data_dir)



Found 380638 images belonging to 4000 classes.
Found 8000 images belonging to 4000 classes.


In [5]:
def create_verification_generator(data_dir, pairs_file):
    df = pd.read_csv(pairs_file, sep='\s+', names=['img1', 'img2', 'label'])
    
    def generator():
        for _, row in df.iterrows():
            img1 = cv2.imread(os.path.join(data_dir, row['img1']))
            img2 = cv2.imread(os.path.join(data_dir, row['img2']))
            img1 = cv2.resize(img1, IMG_SIZE) / 255.0  # Hardcode the size
            img2 = cv2.resize(img2, IMG_SIZE) / 255.0
            yield [img1, img2], row['label']
    
    # Use output_signature instead of output_types/output_shapes
    dataset = tf.data.Dataset.from_generator(
        generator,
        output_signature=(
            (tf.TensorSpec(shape=(IMG_SIZE[0], IMG_SIZE[1], 3), dtype=tf.float32),
             tf.TensorSpec(shape=(IMG_SIZE[0], IMG_SIZE[1], 3), dtype=tf.float32)),
            tf.TensorSpec(shape=(), dtype=tf.float32)
        )
    )
    
    return dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [6]:
verification_dataset = create_verification_generator(data_dir, pairs_file)
print(len(train_dataset.class_indices))

4000


In [7]:
def create_siamese_network(input_shape =(*IMG_SIZE, 3)):
    base_model = MobileNetV2(weights = 'imagenet', include_top = False, input_shape = input_shape)

    for layer in base_model.layers:
        layer.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation = None, name = 'embedding')(x)
    base_network = Model(inputs = base_model.input, outputs = x)
    input_a = Input(shape = input_shape)
    input_b = Input(shape = input_shape)

    embedding_a = base_network(input_a)
    embedding_b = base_network(input_b)

    # Calculate Euclidean distance
    distance = Lambda(lambda tensors: tf.sqrt(tf.reduce_sum(tf.square(tensors[0] - tensors[1]), axis=1, keepdims=True)))([embedding_a, embedding_b])
    
    # Create Siamese model
    siamese_model = Model(inputs=[input_a, input_b], outputs=distance)
    
    return siamese_model, base_network


def create_classification_model(num_classes, input_shape =(*IMG_SIZE, 3)):
    base_model = EfficientNetV2B0(weights = 'imagenet', include_top = False, input_shape= input_shape)
    base_model.trainable = True

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    
    x = Dense(128)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.4)(x)
    
    output = Dense(num_classes, activation = 'softmax')(x)

    model = Model(inputs = base_model.input, outputs = output)
    return model, base_model


In [8]:
def create_callbacks(model_name):
    reduce_lr = ReduceLROnPlateau(
        monitor='val_accuracy',
        factor=0.1,
        patience=4,
        min_lr=1e-6,
        verbose=1
    )
    
    early_stopping = EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    checkpoint = ModelCheckpoint(
        f'best_{model_name}_model_{timestamp}.keras',
        monitor='val_accuracy',
        save_best_only=True,
        mode= 'auto',
        verbose=1
    )
    
    return [reduce_lr, early_stopping, checkpoint]
    
def train_classification_model(pretrained_model_path=None, initial_epoch=0, phase=None):

    classification_model, base_model = create_classification_model(
        len(train_dataset.class_indices), (*IMG_SIZE, 3)
    )
    
    # If you have a pretrained model, load weights instead
    if pretrained_model_path:
        classification_model = load_model(pretrained_model_path)
    
    # Compile
    classification_model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    classification_callbacks = create_callbacks("EfficientNetV2B0_classification")
    history_phase1 = None
    if phase in [1, None]:
        # Phase 1: Frozen base model training
        history_phase1 = classification_model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=EPOCHS,
            initial_epoch=initial_epoch,
            verbose=1,
            callbacks=classification_callbacks
        )

    # Phase 2: Fine-tuning
    for layer in classification_model.layers:
        layer.trainable = True
    print(f"Unfroze base model: {base_model.name}")
    fine_tune_epoch = initial_epoch if phase == 2 else EPOCHS

    
    
    classification_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Fine-tuning phase
    history_phase2 = classification_model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=150,
        initial_epoch=fine_tune_epoch,
        verbose=1,
        callbacks=create_callbacks("classification_finetuned")
    )

    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    print(f"Saving model at {timestamp}")
    classification_model.save(f"custom_model_{timestamp}.keras")
    df = pd.DataFrame(history_phase2.history)
    df.to_csv(f"custom_model_loss_{timestamp}.csv")

    return classification_model, history_phase1, history_phase2



In [9]:
def contrastive_loss(y_true, y_pred):
    """Simple contrastive loss for Siamese network."""
    margin = 1.0
    square_pred = tf.square(y_pred)
    margin_square = tf.square(tf.maximum(margin - y_pred, 0))
    return tf.reduce_mean(y_true * square_pred + (1 - y_true) * margin_square)

def train_siamese_model(model=None, base_model=None, initial_epoch=0, phase=None):
    global strategy  # Use the global MirroredStrategy from Step 1

    
    if model is None:
        siamese_model, base_network = create_siamese_network((*IMG_SIZE, 3))
    else:
        siamese_model = model
        base_network = base_model if base_model else siamese_model.get_layer('model')  # Extract base network

    # Compile model
    siamese_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4, weight_decay=1e-4),
        loss=contrastive_loss,
        metrics=['accuracy']
    )

    siamese_callbacks = create_callbacks("siamese")
    history_phase1 = None
    if phase in [None, 1]:
        print("=== Phase 1: Training Siamese Model (Frozen Base) ===")
        history_phase1 = siamese_model.fit(
            verification_dataset,
            validation_data=verification_dataset,  # Replace with actual validation dataset if available
            epochs=EPOCHS,
            initial_epoch=initial_epoch,
            verbose=1,
            callbacks=siamese_callbacks
        )

    # Phase 2: Fine-tuning
    for layer in base_network.layers:
        layer.trainable = True
        print(f"Unfroze layer: {layer.name}")

    # Recompile with lower learning rate
    
    siamese_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss=contrastive_loss,
        metrics=['accuracy']
    )

    
    history_phase2 = siamese_model.fit(
        verification_dataset,
        validation_data=verification_dataset,  # Replace with actual validation dataset if available
        epochs=EPOCHS,
        initial_epoch=initial_epoch if phase == 1 else EPOCHS,
        verbose=1,
        callbacks=create_callbacks("siamese_finetuned")
    )

    print("Saving model")
    siamese_model.save("custom_model_siamese.keras")
    df = pd.DataFrame(history_phase2.history)
    df.to_csv("custom_model_siamese_loss.csv")

    return siamese_model, base_network, history_phase1, history_phase2

In [10]:
def evaluate_model(siamese_model, verification_dataset):
    predictions = []
    true_labels = []

    for (img1, img2), label in verification_dataset:
        pred = siamese_model.predict([img1, img2], verbose=0)
        predictions.extend(pred.flatten())
        true_labels.extend(label.numpy())

    fpr, tpr, _ = roc_curve(true_labels, predictions)
    roc_auc = auc(fpr, tpr)

    return fpr, tpr, roc_auc

In [11]:
def main():
    # Load or create classification model
    classification_model, hist1, hist2 = train_classification_model(
        pretrained_model_path="/kaggle/input/verfication_model/keras/default/6/best_classification_finetuned_model_20250714_060424 (1).keras",
        initial_epoch=109,
        phase=2
    )

    # Train Siamese model
    siamese_model, base_network, siamese_hist1, siamese_hist2 = train_siamese_model(
        initial_epoch=0,
        phase=None
    )

    # Evaluate Siamese model
    verification_dataset = create_verification_generator(data_dir, pairs_file)
    fpr, tpr, roc_auc = evaluate_model(siamese_model, verification_dataset)
    print(f"ROC AUC: {roc_auc}")

if __name__ == "__main__":
    main()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-b0_notop.h5
[1m24274472/24274472[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Unfroze base model: efficientnetv2-b0


  self._warn_if_super_not_called()


Epoch 110/150


I0000 00:00:1752547502.141103      87 service.cc:148] XLA service 0x7be344001ee0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1752547502.141895      87 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1752547507.871241      87 cuda_dnn.cc:529] Loaded cuDNN version 90300
E0000 00:00:1752547517.584517      87 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1752547517.774593      87 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1752547518.318083      87 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1752547518.5317

[1m   1/2974[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m83:54:48[0m 102s/step - accuracy: 0.8359 - loss: 0.5068

I0000 00:00:1752547539.855700      87 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1453/2974[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m18:30[0m 730ms/step - accuracy: 0.8170 - loss: 0.7004

E0000 00:00:1752548613.229134      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1752548613.414465      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1752548613.913512      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1752548614.122906      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m2974/2974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 783ms/step - accuracy: 0.8158 - loss: 0.7058
Epoch 110: val_accuracy improved from -inf to 0.73563, saving model to best_classification_finetuned_model_20250715_024356.keras
[1m2974/2974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2474s[0m 798ms/step - accuracy: 0.8158 - loss: 0.7058 - val_accuracy: 0.7356 - val_loss: 1.4987 - learning_rate: 1.0000e-04
Epoch 111/150
[1m2974/2974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 598ms/step - accuracy: 0.8187 - loss: 0.6894
Epoch 111: val_accuracy did not improve from 0.73563
[1m2974/2974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1796s[0m 604ms/step - accuracy: 0.8187 - loss: 0.6894 - val_accuracy: 0.7344 - val_loss: 1.5318 - learning_rate: 1.0000e-04
Epoch 112/150
[1m2974/2974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 616ms/step - accuracy: 0.8223 - loss: 0.6775
Epoch 112: val_accuracy improved from 0.73563 to 0.74187, saving model to best_

KeyboardInterrupt: 