In [1]:
import os
import logging
# Suppress warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# set backend
os.environ['KERAS_BACKEND'] = 'torch'
import keras
print("Backend after setting:", keras.config.backend())

import tensorflow as tf
import torch

# Check GPU visibility
print("TensorFlow GPUs:", tf.config.list_physical_devices('GPU'))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("PyTorch Device:", device)

Backend after setting: torch
TensorFlow GPUs: []
PyTorch Device: cuda


I0000 00:00:1741046505.006170  724454 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1741046505.007766  724454 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1741046505.048424  724454 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1741046505.265894  724454 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [2]:
from keras.models import Sequential
from keras import regularizers
from keras.layers import (Input, Conv2D, BatchNormalization, ReLU, MaxPooling2D, 
                          Flatten, Dense, Dropout, Lambda)
from keras.initializers import HeNormal
import keras.ops as K

def get_model(hidden_units, output_units, input_shape, rate, l2_coeff=1e-5):
    """
    Creates a face verification model that outputs normalized embeddings.
    """

    model = Sequential([Input(shape=input_shape)])

    # --- Convolutional blocks / Feature extraction backbone ---

    # note we use he kaiming initialization for the weights
    model.add(Conv2D(32, (3, 3), padding='same', kernel_initializer=HeNormal(),
                     kernel_regularizer=regularizers.l2(l2_coeff)))
    model.add(BatchNormalization())
    model.add(ReLU())
    model.add(MaxPooling2D((2, 2)))

    # 2nd block
    model.add(Conv2D(64, (3, 3), padding='same', kernel_initializer=HeNormal(),
                     kernel_regularizer=regularizers.l2(l2_coeff)))
    model.add(BatchNormalization())
    model.add(ReLU())
    model.add(MaxPooling2D((2, 2)))

    # 3rd block
    model.add(Conv2D(128, (3, 3), padding='same', kernel_initializer=HeNormal(),
                     kernel_regularizer=regularizers.l2(l2_coeff)))
    model.add(BatchNormalization())
    model.add(ReLU())
    model.add(MaxPooling2D((2, 2)))

    model.add(Flatten())

    # --- Fully connected layers ---
    for units in hidden_units:
        model.add(Dense(units, kernel_initializer=HeNormal(),
                        kernel_regularizer=regularizers.l2(l2_coeff)))
        model.add(BatchNormalization())
        model.add(ReLU())
        model.add(Dropout(rate))

    # --- Output layer + normalization ---
    model.add(Dense(output_units, kernel_initializer=HeNormal()))
    model.add(Lambda(lambda x: x / K.norm(x, axis=1, keepdims=True)))

    return model

model = get_model(
    hidden_units=[1024, 128],
    output_units=128,
    input_shape=(112, 112, 3),
    rate=0.5
)
model.summary()

In [3]:
# import os
# import numpy as np
# from tensorflow.keras.utils import Sequence
# from PIL import Image

# class KerasFaceDataset(Sequence):
#     """
#     Keras-compatible dataset that:
#       - Takes a dataset path and extracts image samples with labels
#       - Selects up to max_images_per_identity from each identity
#       - Allows dataset splitting using indexes (train/validation sets)
#       - Returns batches of (images, labels) in (N, 112, 112, 3) format
#     """
#     def __init__(self, dataset_path, identities, max_images_per_identity=10,
#                  batch_size=1024, shuffle=True, sample_indexes=None):
#         self.dataset_path = dataset_path
#         self.identities = identities
#         self.max_images_per_identity = max_images_per_identity
#         self.batch_size = batch_size
#         self.shuffle = shuffle
        
#         self.image_paths = []
#         self.labels = []
        
#         # Collect image paths and their identity labels
#         for idx, identity in enumerate(identities):
#             identity_folder = os.path.join(dataset_path, identity)
#             image_files = sorted(os.listdir(identity_folder))  # Ensure consistency
#             selected_images = image_files[:max_images_per_identity]
#             for img_name in selected_images:
#                 self.image_paths.append(os.path.join(identity_folder, img_name))
#                 self.labels.append(idx)
        
#         self.indexes = np.arange(len(self.image_paths))
        
#         # If sample indexes are provided, filter dataset
#         if sample_indexes is not None:
#             self.image_paths = [self.image_paths[i] for i in sample_indexes]
#             self.labels = [self.labels[i] for i in sample_indexes]
#             self.indexes = np.arange(len(self.image_paths))

#         self.on_epoch_end()  # Shuffle at the start if needed

#     def __len__(self):
#         """Number of batches per epoch."""
#         return int(np.ceil(len(self.image_paths) / float(self.batch_size)))

#     def on_epoch_end(self):
#         """Shuffle indexes after each epoch if needed."""
#         if self.shuffle:
#             np.random.shuffle(self.indexes)

#     def __getitem__(self, index):
#         """Generates one batch of data."""
#         batch_indexes = self.indexes[index * self.batch_size:
#                                      (index + 1) * self.batch_size]
        
#         batch_paths = [self.image_paths[i] for i in batch_indexes]
#         batch_labels = [self.labels[i] for i in batch_indexes]

#         images = []
#         for path in batch_paths:
#             img = Image.open(path).convert("RGB")
#             img = img.resize((112, 112), resample=Image.BILINEAR)
#             img = np.array(img, dtype=np.float32) / 255.0  # Scale to [0, 1]
#             images.append(img)

#         images = np.stack(images, axis=0)  # Convert to (N, 112, 112, 3)
#         labels = np.array(batch_labels, dtype=np.int32)
        
#         return images, labels


In [None]:
# from sklearn.model_selection import train_test_split

# # Load dataset identities
# dataset_path = "data/casia-webface"
# identities = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
# print("Number of identities in dataset:", len(identities))

# # Create a full dataset instance
# full_dataset = KerasFaceDataset(dataset_path, identities, max_images_per_identity=10, batch_size=1024, shuffle=True)
# print("Number of batches in full dataset:", len(full_dataset))

# # Split sample indexes randomly (instead of by identity)
# indices = np.arange(len(full_dataset.image_paths))
# train_indices, val_indices = train_test_split(indices, test_size=0.2, random_state=42)

# # Create train and validation datasets
# train_dataset = KerasFaceDataset(dataset_path, identities,
#                                  max_images_per_identity=10, 
#                                  batch_size=1024, sample_indexes=train_indices)
# val_dataset = KerasFaceDataset(dataset_path, identities, 
#                                max_images_per_identity=10, batch_size=1024, 
#                                sample_indexes=val_indices)


Number of identities in dataset: 8000
Number of batches in full dataset: 79


total_data_size = ideneitities * max_per_identity = 8000 * 10

each batch need 10 faces per identity, so each batch need 8000 pics

In [None]:
# print(f"Train dataset size: {len(train_dataset)}")
# print(f"Validation dataset size: {len(val_dataset)}")
# # check shape
# for images, labels in train_dataset:
#     print(f"Images shape: {images.shape}, Labels shape: {labels.shape}")
#     break


Train dataset size: 63
Validation dataset size: 16
Images shape: (1024, 112, 112, 3), Labels shape: (1024,)


In [6]:
import os
import random
import numpy as np
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader, Sampler

class PytorchFaceDataset(Dataset):
    """
    A single unified class that:
      - Loads up to `max_images_per_identity` images from each identity folder.
      - Optionally uses `sample_indexes` for train/val slicing.
    """
    def __init__(
        self, dataset_path, identities,
        max_images_per_identity=10,
        sample_indexes=None,
        classes_per_batch=None,
        samples_per_class=None
    ):
        self.dataset_path = dataset_path
        self.identities = identities
        self.max_images_per_identity = max_images_per_identity
        self.sample_indexes = sample_indexes
        self.classes_per_batch = classes_per_batch
        self.samples_per_class = samples_per_class

        self.image_paths = []
        self.labels = []  # We'll group by label = idx in `identities`
        
        # Gather all images & labels
        for idx, identity in enumerate(identities):
            identity_folder = os.path.join(dataset_path, identity)
            if not os.path.isdir(identity_folder):
                continue  # skip if folder doesn't exist

            image_files = sorted(os.listdir(identity_folder))
            # Take up to max_images_per_identity images
            selected_images = image_files[:max_images_per_identity]
            for img_name in selected_images:
                self.image_paths.append(os.path.join(identity_folder, img_name))
                self.labels.append(idx)

        # sample_indexes is train_indices or val_indices
        if self.sample_indexes is not None:
            self.image_paths = [self.image_paths[i] for i in self.sample_indexes]
            self.labels = [self.labels[i] for i in self.sample_indexes]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        img_path = self.image_paths[index]
        label = self.labels[index]

        # Load image, convert to RGB, resize to (112, 112)
        img = Image.open(img_path).convert("RGB")
        img = img.resize((112, 112), resample=Image.BILINEAR)
        # Normalize to [0, 1]
        img = np.array(img, dtype=np.float32) / 255.0  # shape: (112, 112, 3)

        img_tensor = torch.from_numpy(img)  # currently (112, 112, 3)

        return img_tensor, label

    def get_dataloader(self, batch_size=128, shuffle=True, num_workers=4):
        # If we have custom-batch parameters set, build the custom sampler
        if self.classes_per_batch is not None and self.samples_per_class is not None:
            return DataLoader(
                self,
                batch_sampler=self._BatchSampler(
                    self.labels,
                    self.classes_per_batch,
                    self.samples_per_class
                ),
                num_workers=num_workers
            )
        else:
            return DataLoader(
                self,
                batch_size=batch_size,
                shuffle=shuffle,
                num_workers=num_workers
            )

    class _BatchSampler(Sampler):
        """
        Ensures each batch contains `classes_per_batch` classes, each has
        `samples_per_class` samples or fewer.
        """
        def __init__(self, labels, classes_per_batch, samples_per_class):
            self.labels = labels
            self.classes_per_batch = classes_per_batch
            self.samples_per_class = samples_per_class

            # Group indices by class
            self.class_to_indices = {}
            for idx, label in enumerate(labels):
                self.class_to_indices.setdefault(label, []).append(idx)

            # Keep list of all classes for shuffling each epoch
            self.all_classes = list(self.class_to_indices.keys())

        def __iter__(self):
            '''Defines how the batch indices are generated.'''
            random.shuffle(self.all_classes) # Shuffle list of all labels each epoch

            # We'll chunk the shuffled class list in groups of 'classes_per_batch'

            # loop over shuffled classes in chunks of 'classes_per_batch'
            # e.g. 8000 classes, 10 class per epoch, so 800 iterations
            for start in range(0, len(self.all_classes), self.classes_per_batch):
                chunk_classes = self.all_classes[start:start + self.classes_per_batch]
                
                batch_indices = []
                for cls in chunk_classes: # collect sample indices for each class
                    idx_list = self.class_to_indices[cls] # map label to indices

                    # if a class has enough samples, sample 'samples_per_class' indices
                    # otherwise, take all indices
                    if len(idx_list) >= self.samples_per_class:
                        chosen = random.sample(idx_list, self.samples_per_class)
                    else:
                        chosen = idx_list  # class is smaller than desired
                    batch_indices.extend(chosen)

                yield batch_indices # move to dataloader

        def __len__(self):
            ''' computes how many batches are needed to cover all classes '''
            # e.g. 8000 class, 10 class per batch, so (8010-1) // 10 = 800
            return (len(self.all_classes) + self.classes_per_batch - 1) // self.classes_per_batch


Note we have 8000 identities, and choose 10 images from each ideneity, so total size = 80k

We have 800 batches, and within each batch, 10 classes and $\leq$ 10 samples from each class

In [7]:
indices = np.arange(80000)
train_indices, val_indices = train_test_split(indices, test_size=0.2, random_state=42)
train_torch_dataset = PytorchFaceDataset(dataset_path, identities, 
                                         max_images_per_identity=10, 
                                         sample_indexes=train_indices,
                                         classes_per_batch=10,   # each batch will pick 10 classes
                                        samples_per_class=10) # each class has 10 samples
train_loader = train_torch_dataset.get_dataloader(num_workers=4) # batch size = 10*10=100
# val_torch_dataset = PytorchFaceDataset(dataset_path, identities,
#                                        max_images_per_identity=10, 
#                                        sample_indexes=val_indices)
val_torch_dataset = PytorchFaceDataset(dataset_path, identities,
                                       max_images_per_identity=10, 
                                       sample_indexes=val_indices,
                                       classes_per_batch=10,   # each batch will pick 10 classes
                                       samples_per_class=10) # each class has 10 samples
# val_loader = val_torch_dataset.get_dataloader(batch_size=128, shuffle=False)
val_loader = val_torch_dataset.get_dataloader(num_workers=4) # batch size = 10*10=100

# Check shape
for images, labels in train_loader:
    print(f"Images shape: {images.shape}, Labels shape: {labels.shape}")
    print("Unique labels in this batch:", len(set(labels.tolist())))
    break


  self.pid = os.fork()
  self.pid = os.fork()


Images shape: torch.Size([83, 112, 112, 3]), Labels shape: torch.Size([83])
Unique labels in this batch: 10


In [8]:
import torch
import torch.nn.functional as F

def circle_loss_torch(anchor_embeddings, positive_embeddings, negative_embeddings, m=0.25, gamma=64.0):
    # -- Step 1: Compute scaled cosine similarities in [0, 1].
    s_p = ((anchor_embeddings * positive_embeddings).sum(dim=1) + 1.0) / 2.0
    s_n = ((anchor_embeddings * negative_embeddings).sum(dim=1) + 1.0) / 2.0

    alpha_p = 1.0 + m - s_p
    alpha_n = s_n + m
    delta_p = 1.0 - m
    delta_n = m

    # We'll compute this in a vectorized way and then average across the batch.
    inside_term = gamma * (alpha_n * (s_n - delta_n) - alpha_p * (s_p - delta_p))
    losses = (1.0 / gamma) * torch.log1p(torch.exp(inside_term))  # log1p(x) = log(1 + x)
    
    return losses.mean()


The idea is to dynamically generate triplets during training

In [9]:
import torch
import numpy as np
from itertools import combinations

def get_triplets(model, batch_data, margin=0.2, validation=False):
    """Selects hard triplets from a batch for triplet loss."""
    images, labels = batch_data  # Unpack batch
    labels = labels.cpu().numpy()  # Convert labels to numpy array

    with torch.no_grad():
        embeddings = model(images)
    
    # Compute pairwise distances
    batch_size = embeddings.shape[0] # should be 1024
    distances = torch.cdist(embeddings, embeddings, p=2)  # Euclidean dist

    triplets = []
    
    for anchor_idx in range(batch_size):
        anchor_label = labels[anchor_idx]

        # Find positive indices
        positive_indices = np.where(labels == anchor_label)[0]
        positive_indices = positive_indices[positive_indices != anchor_idx]  # Exclude anchor

        # print(len(positive_indices), 'positive indices length')

        # Find negative indices
        negative_indices = np.where(labels != anchor_label)[0]
        
        # print(len(negative_indices), 'negative indices length')

        if len(positive_indices) == 0 or len(negative_indices) == 0:
            continue  # Skip if we can't form a triplet

        # Select the hardest positive (farthest within same class)
        hardest_positive_idx = max(positive_indices, key=lambda i: distances[anchor_idx, i].item())

        # semi-hard negative, need to have bigger distance than hardest positive
        semi_hard_negatives = [i for i in negative_indices if distances[anchor_idx, i] > distances[anchor_idx, hardest_positive_idx]]
        if len(semi_hard_negatives) == 0:
            continue
        else:
            # neg_inx = random.choice(semi_hard_negatives) # randomly choose one negative
            hardest_negative_idx = min(semi_hard_negatives, key=lambda i: distances[anchor_idx, i].item())

        # print("Positive distance:", distances[anchor_idx, hardest_positive_idx])
        # print("hardest Negative distance:", distances[anchor_idx, hardest_negative_idx])

        if validation:
            # Don't need margin condition
            neg_inx = random.choice(negative_indices)
            triplets.append((anchor_idx, hardest_positive_idx, neg_inx))
            return triplets
        
        # For training, need to ensure valid triplet (margin condition)
        if distances[anchor_idx, hardest_negative_idx] - distances[anchor_idx, hardest_positive_idx] > margin:
            triplets.append((anchor_idx, hardest_positive_idx, hardest_negative_idx))

    return triplets


In [10]:
for batch in train_loader:
    # print(batch[0].shape, print(batch[1].shape), print(len(batch)))
    print(batch[0].shape, 'batch 0 shape')
    print(batch[1].shape, 'batch 1 shape')
    print(len(batch), 'length of batch')
    break

torch.Size([75, 112, 112, 3]) batch 0 shape
torch.Size([75]) batch 1 shape
2 length of batch


In [11]:
for images, labels in train_loader:
    triplets = get_triplets(model, (images.to(device), labels.to(device)), margin=1e-5)
    print(f"Number of triplets: {len(triplets)}")
    print("Sample triplet:", triplets[0])
    anchor = triplets[0][0]
    print(images[anchor].shape) # anchor image shape
    break

Number of triplets: 73
Sample triplet: (0, 5, 70)
torch.Size([112, 112, 3])


In [12]:
def pt_train_step(mlp_model, loss_fn, opt, train_batch):
    """PyTorch training step for triplet loss."""
    mlp_model.zero_grad()

    (anchor, positive, negative) = train_batch

    anchor_embed = mlp_model(anchor)  # Shape: (batch_size, embedding_dim)
    positive_embed = mlp_model(positive)
    negative_embed = mlp_model(negative)

    loss = loss_fn(anchor_embed, positive_embed, negative_embed)

    loss.backward()

    grads = [param.grad for param in mlp_model.parameters()]

    return loss.item(), grads


In [13]:
def pt_valid_step(mlp_model, val_batch):
    """PyTorch validation step for triplet loss."""
    (anchor, positive, negative) = val_batch

    anchor_embed = mlp_model(anchor)  # Shape: (batch_size, embedding_dim)
    positive_embed = mlp_model(positive)
    negative_embed = mlp_model(negative)
    
    pos_dist = torch.norm(anchor_embed - positive_embed, dim=1)  # Distance A→P
    neg_dist = torch.norm(anchor_embed - negative_embed, dim=1)  # Distance A→N
    # check what percentage of data has anchor_embed closer to positive_embed than negative_embed


    correct = (pos_dist < neg_dist).float()  # 1 if correct, 0 if incorrect
    accuracy = correct.mean().item()
    return accuracy

In [14]:
import keras
loss_metric = keras.metrics.Mean()
accuracy_metric = keras.metrics.SparseCategoricalAccuracy()
val_accuracy_metric = keras.metrics.Mean() 
# val_loss_metric = keras.metrics.Mean()

In [15]:
def train_model_custom(mlp_model, loss_fn, opt, training_dataset, validation_dataset, train_step_fn, valid_step_fn, epochs):
    """
    Train the model and evaluate on a validation set.
    Returns lists of training and validation losses/accuracies.
    """
    # check if training is using GPU
    print("Training on GPU:", next(mlp_model.parameters()).is_cuda)

    epoch_losses = []
    epoch_acc = []
    val_epoch_losses = []
    val_epoch_acc = []

    for epoch in range(epochs):
        print(f"Epoch {epoch}/{epochs}")
        # Reset metrics for new epoch
        loss_metric.reset_state()
        val_accuracy_metric.reset_state()
        
        # Training loop
        for images, labels in training_dataset:
            triplets = get_triplets(model, (images.to(device), labels.to(device)), margin=1e-5) # triplet indices
            anchors = [triplet[0] for triplet in triplets] # anchor indices
            positives = [triplet[1] for triplet in triplets] # positive indices
            negatives = [triplet[2] for triplet in triplets] # negative indices

            triplet_data = (images[anchors], images[positives], images[negatives])
            loss, grads = train_step_fn(mlp_model, loss_fn, opt, train_batch=triplet_data)
            opt.apply_gradients(zip(grads, mlp_model.trainable_variables))
            
            loss_metric.update_state(loss)

        # Compute training loss and accuracy
        avg_epoch_loss = float(loss_metric.result().cpu().numpy())

        # Validation loop
        with torch.no_grad():  # Disable gradients for validation
            for images, labels in validation_dataset:
                triplets = get_triplets(model, (images.to(device), labels.to(device)), margin=1e-5, validation=True)
                anchors = [triplet[0] for triplet in triplets]
                positives = [triplet[1] for triplet in triplets]
                negatives = [triplet[2] for triplet in triplets]
                triplet_data = (images[anchors], images[positives], images[negatives])
                acc = valid_step_fn(mlp_model, val_batch=triplet_data)
                val_accuracy_metric.update_state(acc)

        # Compute validation loss and accuracy
        avg_val_acc = float(val_accuracy_metric.result().cpu().numpy())

        # Store epoch results
        epoch_losses.append(avg_epoch_loss)
        # val_epoch_losses.append(avg_val_loss)
        val_epoch_acc.append(avg_val_acc)

        # Print progress
        print(f"Epoch {epoch}: loss - {avg_epoch_loss:.4f}, ")

        # for param in model.parameters():
        #     print('grad:', param.grad)

        # save checkpoint
        # if epoch % 10 == 0:
        print(f"val_acc - {avg_val_acc:.4f}")

        mlp_model.save(f"checkpoint_epoch_{epoch}.keras")


        # Save additional metadata separately (like epoch & val accuracy)
        metadata = {
            'epoch': epoch,
            'val_accuracy': avg_val_acc
        }
        import json
        with open(f"checkpoint_metadata_epoch_{epoch}.json", "w") as f:
            json.dump(metadata, f)


    return epoch_losses


In [16]:
# optimizer = keras.optimizers.SGD()
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
optimizer = keras.optimizers.SGD(learning_rate=0.1)  # Try 0.1 or higher

epoch_losses = train_model_custom(model, loss_fn=circle_loss_torch, opt=optimizer,
                                            training_dataset=train_loader, 
                                            validation_dataset=val_loader, 
                                            train_step_fn=pt_train_step, 
                                            valid_step_fn=pt_valid_step,
                                            epochs=100)


Training on GPU: True
Epoch 0/100




Epoch 0: loss - 0.3992, 
val_acc - nan
Epoch 1/100
Epoch 1: loss - 0.3828, 
val_acc - 0.5014
Epoch 2/100
Epoch 2: loss - 0.3997, 
val_acc - 0.4651
Epoch 3/100
Epoch 3: loss - 0.3922, 
val_acc - 0.4986
Epoch 4/100
Epoch 4: loss - 0.3890, 
val_acc - 0.5363
Epoch 5/100
Epoch 5: loss - 0.3780, 
val_acc - 0.4707
Epoch 6/100
Epoch 6: loss - 0.3788, 
val_acc - 0.5265
Epoch 7/100
Epoch 7: loss - 0.3774, 
val_acc - 0.4609
Epoch 8/100
Epoch 8: loss - 0.3766, 
val_acc - 0.4902
Epoch 9/100
Epoch 9: loss - 0.3793, 
val_acc - 0.4986
Epoch 10/100
Epoch 10: loss - 0.3772, 
val_acc - 0.5000
Epoch 11/100
Epoch 11: loss - 0.3767, 
val_acc - 0.4972
Epoch 12/100
Epoch 12: loss - 0.3767, 
val_acc - 0.5000
Epoch 13/100
Epoch 13: loss - 0.3789, 
val_acc - 0.5182
Epoch 14/100
Epoch 14: loss - 0.3769, 
val_acc - 0.4916
Epoch 15/100
Epoch 15: loss - 0.3770, 
val_acc - 0.5056
Epoch 16/100
Epoch 16: loss - 0.3771, 
val_acc - 0.5098
Epoch 17/100
Epoch 17: loss - 0.3774, 
val_acc - 0.5098
Epoch 18/100
Epoch 18: loss

KeyboardInterrupt: 

In [None]:
pairs_file = "./data/lfw_test_pairs.txt"
lfw_folder = "./data/labeled-faces-in-the-wild"

In [17]:
import torch
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    num_devices  = torch.cuda.device_count()
    print(f"Number of GPUs: {num_devices}")
    for i in range(num_devices):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
        total_mem = torch.cuda.get_device_properties(device).total_memory
        print(f"Device {i}: {total_mem/1024**3:.2f}GB")

Number of GPUs: 4
Device 0: NVIDIA RTX A6000
Device 0: 47.43GB
Device 1: NVIDIA RTX A6000
Device 1: 47.43GB
Device 2: NVIDIA RTX A6000
Device 2: 47.43GB
Device 3: NVIDIA RTX A6000
Device 3: 47.43GB
