# Pretraining the base embedding model 

This notebook is for pretraining the base embedding model on casia-webface. <br>
The pretrained model will then be used for fine tuning on LFW.

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf, keras
from models import get_backbone_model

E0000 00:00:1735057580.590995    9171 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1735057580.596187    9171 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
casia_dir = os.path.join('Data', 'Casia Webface','casia-webface-augmented')
batch_size = 192
image_size = 128

In [3]:
training_data, test_data = keras.utils.image_dataset_from_directory(
    casia_dir,
    labels = 'inferred',
    label_mode = 'int',
    batch_size = batch_size,
    image_size = (image_size, image_size),
    color_mode = 'rgb',
    validation_split = 0.15,
    subset = 'both',
    shuffle = True,
    seed = 100
)
training_data = training_data.prefetch(tf.data.AUTOTUNE)
test_data = test_data.prefetch(tf.data.AUTOTUNE)

Found 2453115 files belonging to 10572 classes.
Using 2085148 files for training.
Using 367967 files for validation.


I0000 00:00:1735057713.186234    9171 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5867 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Ti, pci bus id: 0000:09:00.0, compute capability: 8.6


Get the base embedding model:

In [4]:
backbone_model = get_backbone_model(image_size = image_size)

In [5]:
backbone_model.summary()

Add classification head:

In [6]:
classification_model = keras.models.Sequential(
    [
        backbone_model,
        keras.layers.Dropout(0.1),
        keras.layers.Dense(units = 10572, activation = 'softmax')
    ]
)

In [7]:
classification_model.summary()

Compile the model and train:

In [8]:
optimizer = keras.optimizers.Adam(
    learning_rate = 0.00001,
)


classification_model.compile(
    optimizer = optimizer, 
    loss = keras.losses.SparseCategoricalCrossentropy(),
    metrics = [keras.metrics.SparseCategoricalAccuracy(), keras.metrics.SparseTopKCategoricalAccuracy()]
)
weights_path = os.path.join('Data', 'Models', 'casia classification model', 'model.weights.h5')

early_stop = keras.callbacks.EarlyStopping(monitor = 'loss', patience = 10, verbose = 1)
learning_rate_schedule = keras.callbacks.ReduceLROnPlateau(monitor = 'loss', factor = 0.2, patience = 3)

checkpoint = keras.callbacks.ModelCheckpoint(
    filepath = weights_path,
    verbose = 1, 
    save_weights_only = True,
    monitor = 'loss',
    save_best_only = True,
    save_freq = 1000 #save every 1000 batches

)

classification_model.load_weights(weights_path)

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
classification_model.fit(
    training_data, 
    batch_size = batch_size, 
    epochs = 150, 
    callbacks = [early_stop, learning_rate_schedule, checkpoint]
)

In [9]:
classification_model.evaluate(test_data, batch_size = batch_size)

I0000 00:00:1735057721.282611    9311 service.cc:148] XLA service 0x78c568042980 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1735057721.282641    9311 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Ti, Compute Capability 8.6
I0000 00:00:1735057721.743790    9311 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1735057733.048691    9311 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1917/1917[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 84ms/step - loss: 0.2859 - sparse_categorical_accuracy: 0.9481 - sparse_top_k_categorical_accuracy: 0.9747


[0.2874649465084076, 0.948041558265686, 0.9746797680854797]

In [11]:
# remove the classification head and save the weights of the backbone

backbone = keras.models.Model(
    inputs = classification_model.layers[0].input,
    outputs = classification_model.layers[0].output
)

backbone.save_weights(
    os.path.join('Data', 'Models', 'backbone after classification', 'model.weights.h5')
)

# Pretrain the siamese network on casia webface

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '4'
import keras, numpy as np
from models import get_siamese_model, contrastive_loss
from data_loaders import DataLoaderContrastive, get_dataset_contrastive_with_prefetching

image_size = 128

E0000 00:00:1735377390.855663    6090 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1735377390.889165    6090 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Build the siamese network and load the classification weights from the first pretraining stage

In [2]:
backbone_weights_path = os.path.join('Data', 'Models', 'backbone after classification', 'model.weights.h5')
siamese_model = get_siamese_model(backbone_weights_path, with_augmentation = True)

I0000 00:00:1735377396.282581    6090 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5833 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Ti, pci bus id: 0000:09:00.0, compute capability: 8.6


In [3]:
siamese_model.summary()

Set up the training on casia webface

In [4]:
learning_rate = 0.00001
weight_decay = 0.0001
siamese_model.compile(
    loss = contrastive_loss(),
    optimizer = keras.optimizers.Adam(learning_rate = learning_rate, weight_decay = weight_decay),
    metrics = ['accuracy']
)
checkpoint_path = os.path.join('Data', 'Models', 'pretrained contrastive loss model', 'model.weights.h5')
checkpoint = keras.callbacks.ModelCheckpoint(
    checkpoint_path,
    verbose = 1,
    save_best_only = False,
    save_weights_only = True,
    monitor = 'val_loss'
)

In [5]:
batch_size = 32
positive_ratio = 0.2

train_loader = DataLoaderContrastive(
    dataset_root_path = os.path.join('Data', 'Casia Webface', 'casia-webface'),
    batch_size = batch_size,
    image_size = (128, 128),
    positive_ratio = positive_ratio
)

validation_loader = DataLoaderContrastive(
    dataset_root_path = os.path.join('Data', 'Casia Webface', 'casia-webface'),
    batch_size = batch_size,
    image_size = (128, 128),
    positive_ratio = positive_ratio
)
validation_loader.batches_num //= 4 #to reduce the number of batches in the validation dataset

Load checkpoint weights

In [6]:
siamese_model.load_weights(checkpoint_path)

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
history = siamese_model.fit(
    x = train_loader,
    validation_data = validation_loader,
    epochs = 10000,
    batch_size = batch_size,
    callbacks = [checkpoint]
)