In [1]:
!pip install facenet_pytorch

Collecting facenet_pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting Pillow<10.3.0,>=10.2.0 (from facenet_pytorch)
  Downloading pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet_pytorch)
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet_pytorch)
  Downloading torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet_pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet_pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet_pytorch)
  Downloading nvidia_cuda_

In [2]:
!pip install facenet-pytorch



In [4]:
!pip install --upgrade pillow


Collecting pillow
  Downloading pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.1 kB)
Downloading pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m68.8 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: pillow
  Attempting uninstall: pillow
    Found existing installation: pillow 10.2.0
    Uninstalling pillow-10.2.0:
      Successfully uninstalled pillow-10.2.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
facenet-pytorch 2.6.0 requires Pillow<10.3.0,>=10.2.0, but you have pillow 11.1.0 which is incompatible.
mlxtend 0.23.3 requires scikit-learn>=1.3.1, but you have scikit-learn 1.2.2 which is incompatible.
plotnine 0.14.4 requires matplotlib>=3.8.0, but you have matplotlib 3.7.5 which is incompatible.[0m[31m

In [24]:
import os
import pandas as pd
import cv2
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader,random_split
import torchvision.transforms as transforms
import torchvision.models as models
from facenet_pytorch import InceptionResnetV1
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image
import numpy as np
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR

In [6]:
class FaceTestDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.data.columns = self.data.columns.str.lower()
        assert 'image_path' in self.data.columns and 'gt' in self.data.columns

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data.iloc[idx]['image_path'])
        img = Image.open(img_path).convert('RGB')
        return (self.transform(img), self.data.iloc[idx]['image_path'])


In [7]:
class FaceReIDDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.data.columns = self.data.columns.str.lower()
        assert 'gt' in self.data.columns and 'image_path' in self.data.columns
        self.labels = [int(row['gt'].split('_')[1]) for _, row in self.data.iterrows()]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data.iloc[idx]['image_path'])
        img = Image.open(img_path).convert('RGB')
        return (self.transform(img) if self.transform else img, self.labels[idx])

In [8]:
#test Transform augmentation
vgg_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
# train Transform augemtation
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(160, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    vgg_transform
])

In [9]:
# 3. Set dataset paths and create dataset/dataloader
# train paths
csv_path = "/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/trainset.csv"
root_dir = "/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification"

#test paths
csv_path_test = "/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/eval_set.csv"
root_dir_test = "/kaggle/input/surveillance-for-retail-stores/face_identification/face_identification/test"

In [10]:
# full training dataset
train_dataset = FaceReIDDataset(csv_file=csv_path, root_dir=root_dir, transform=train_transforms)

In [11]:
# train and validation dataloaders
train_dataloader = DataLoader(train_dataset, batch_size = 32,shuffle = True,num_workers =2)

# test dataset and dataloader
test_dataset = FaceTestDataset(csv_file=csv_path_test, root_dir=root_dir_test, transform = vgg_transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [12]:
# 4. Modified ArcMarginProduct with device-aware buffers
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        
        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        
        self.register_buffer('cos_m', torch.cos(torch.tensor(m)))
        self.register_buffer('sin_m', torch.sin(torch.tensor(m)))
        self.register_buffer('th', torch.cos(torch.pi - torch.tensor(m)))
        self.register_buffer('mm', torch.sin(torch.pi - torch.tensor(m)) * m)

    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.clamp(cosine**2, 1e-7, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1), 1.0)
        return (one_hot * phi + (1.0 - one_hot) * cosine) * self.s



In [20]:
from facenet_pytorch import MTCNN, InceptionResnetV1

# Load InceptionResNetV1 with VGGFace2 weights
def get_pretrained_model(device):
    model = InceptionResnetV1(pretrained='vggface2', classify=False)
    
    # Manually load weights if needed (alternative approach)
    # state_dict = torch.hub.load_state_dict_from_url(
    #     'https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180402-114759-vggface2.pt',
    #     progress=True
    # )
    # model.load_state_dict(state_dict)
    
    return model.to(device)

In [21]:
class FaceReIDModel(nn.Module):
    def __init__(self, backbone, num_classes):
        super().__init__()
        self.backbone = backbone
        self.arcface = ArcMarginProduct(512, num_classes)  # 512 is InceptionResNetV1 embedding size
        
    def forward(self, x, labels=None):
        embeddings = F.normalize(self.backbone(x))  # Add normalization
        if labels is not None:
            return self.arcface(embeddings, labels)
        return embeddings

In [22]:
def train_model(model, train_loader, criterion, optimizer, scheduler, device, num_epochs=20):
    model.train()
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs, labels)
            loss = criterion(outputs, labels)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        scheduler.step()
        epoch_loss = running_loss / total
        epoch_acc = correct / total
        
        print(f'Epoch {epoch+1} - Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
        
        # Save best model
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            torch.save(model.state_dict(), 'best_model.pth')
    
    print(f'Training complete. Best Acc: {best_acc:.4f}')

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 125  # From 125 unique individuals

# Initialize model
backbone = get_pretrained_model(device)
model = FaceReIDModel(backbone, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW([
    {'params': model.backbone.parameters(), 'lr': 5e-5, 'weight_decay': 1e-4},
    {'params': model.arcface.parameters(), 'lr': 1e-3, 'weight_decay': 1e-4}
])
scheduler = CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-6)

# Start training
train_model(model, train_dataloader, criterion, optimizer, scheduler, device, num_epochs=20)

Epoch 1/20: 100%|██████████| 427/427 [00:46<00:00,  9.13it/s]


Epoch 1 - Loss: 10.3475 Acc: 0.1467


Epoch 2/20: 100%|██████████| 427/427 [00:45<00:00,  9.36it/s]


Epoch 2 - Loss: 3.3000 Acc: 0.5207


Epoch 3/20: 100%|██████████| 427/427 [00:44<00:00,  9.51it/s]


Epoch 3 - Loss: 1.7422 Acc: 0.7094


Epoch 4/20: 100%|██████████| 427/427 [00:45<00:00,  9.34it/s]


Epoch 4 - Loss: 1.1107 Acc: 0.7938


Epoch 5/20: 100%|██████████| 427/427 [00:45<00:00,  9.29it/s]


Epoch 5 - Loss: 0.8065 Acc: 0.8401


Epoch 6/20: 100%|██████████| 427/427 [00:45<00:00,  9.35it/s]


Epoch 6 - Loss: 0.5640 Acc: 0.8828


Epoch 7/20: 100%|██████████| 427/427 [00:45<00:00,  9.43it/s]


Epoch 7 - Loss: 0.4682 Acc: 0.8995


Epoch 8/20: 100%|██████████| 427/427 [00:44<00:00,  9.66it/s]


Epoch 8 - Loss: 0.3682 Acc: 0.9187


Epoch 9/20: 100%|██████████| 427/427 [00:44<00:00,  9.64it/s]


Epoch 9 - Loss: 0.2591 Acc: 0.9438


Epoch 10/20: 100%|██████████| 427/427 [00:45<00:00,  9.44it/s]


Epoch 10 - Loss: 0.1927 Acc: 0.9536


Epoch 11/20: 100%|██████████| 427/427 [00:45<00:00,  9.47it/s]


Epoch 11 - Loss: 0.1752 Acc: 0.9605


Epoch 12/20: 100%|██████████| 427/427 [00:46<00:00,  9.20it/s]


Epoch 12 - Loss: 0.1400 Acc: 0.9676


Epoch 13/20: 100%|██████████| 427/427 [00:45<00:00,  9.42it/s]


Epoch 13 - Loss: 0.1120 Acc: 0.9748


Epoch 14/20: 100%|██████████| 427/427 [00:46<00:00,  9.25it/s]


Epoch 14 - Loss: 0.0711 Acc: 0.9820


Epoch 15/20: 100%|██████████| 427/427 [00:45<00:00,  9.32it/s]


Epoch 15 - Loss: 0.0607 Acc: 0.9840


Epoch 16/20: 100%|██████████| 427/427 [00:45<00:00,  9.42it/s]


Epoch 16 - Loss: 0.0473 Acc: 0.9889


Epoch 17/20: 100%|██████████| 427/427 [00:44<00:00,  9.52it/s]


Epoch 17 - Loss: 0.0326 Acc: 0.9936


Epoch 18/20: 100%|██████████| 427/427 [00:45<00:00,  9.44it/s]


Epoch 18 - Loss: 0.0444 Acc: 0.9883


Epoch 19/20: 100%|██████████| 427/427 [00:45<00:00,  9.44it/s]


Epoch 19 - Loss: 0.0282 Acc: 0.9928


Epoch 20/20: 100%|██████████| 427/427 [00:44<00:00,  9.50it/s]

Epoch 20 - Loss: 0.0257 Acc: 0.9934
Training complete. Best Acc: 0.9936





In [26]:
from tqdm import tqdm
model.eval()
gallery_embeddings = {}  # key: label (int), value: list of embeddings

with torch.inference_mode():
    for images, labels in tqdm(train_dataloader, desc="Computing gallery embeddings"):
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass through the backbone only for embeddings
        embeddings = model(images)
        for emb, lbl in zip(embeddings, labels):
            lbl = int(lbl.item())
            if lbl not in gallery_embeddings:
                gallery_embeddings[lbl] = []
            gallery_embeddings[lbl].append(emb.cpu().numpy())

Computing gallery embeddings: 100%|██████████| 427/427 [00:41<00:00, 10.33it/s]


In [27]:
# Average embeddings for each identity, with a check for dimensionality
for lbl in gallery_embeddings:
    emb_list = gallery_embeddings[lbl]
    # Convert the list to a NumPy array
    arr = np.array(emb_list)
    print(f"Label {lbl}: array shape before averaging: {arr.shape}")
    if arr.size == 0:
        # Handle empty list (this shouldn't happen normally)
        print(f"Warning: No embeddings found for label {lbl}.")
        gallery_embeddings[lbl] = np.zeros(512)  # assuming embedding size is 512
    elif arr.ndim == 0:
        # arr is a scalar, convert to a 1D array
        gallery_embeddings[lbl] = np.array([arr])
    elif arr.ndim == 1:
        # Only one embedding, so no averaging needed
        gallery_embeddings[lbl] = arr
    else:
        # Multiple embeddings: average them along axis 0
        gallery_embeddings[lbl] = np.mean(arr, axis=0)


# Convert gallery embeddings to a tensor matrix for similarity computation
gallery_labels = list(gallery_embeddings.keys())
gallery_matrix = np.array([gallery_embeddings[lbl] for lbl in gallery_labels])
gallery_matrix_tensor = torch.tensor(gallery_matrix, device=device)

# 5. Define a cosine similarity function for a single embedding
def cosine_similarity(emb, gallery_tensor):
    # emb: shape [embedding_size], gallery_tensor: shape [N, embedding_size]
    emb = emb.unsqueeze(0)  # shape [1, embedding_size]
    return F.cosine_similarity(emb, gallery_tensor)

# 6. Testing loop: Compute embeddings for test images and find best matching identity
threshold = 0.8
results = []

model.eval()
with torch.inference_mode():
    for images, img_paths in tqdm(test_dataloader, desc="Processing test images"):
        images = images.to(device)
        # Compute embeddings using the backbone only
        embeddings = model(images)
        embeddings = F.normalize(embeddings)
        for emb, img_path in zip(embeddings, img_paths):
            sim = cosine_similarity(emb, gallery_matrix_tensor)
            max_sim, max_idx = torch.max(sim, dim=0)
            if max_sim.item() < threshold:
                pred_label = "doesn't_exist"
            else:
                pred_label = f"person_{gallery_labels[max_idx.item()]}"
            results.append({
                "image_path": img_path,
                "pred": pred_label
            })

Label 45: array shape before averaging: (96, 512)
Label 117: array shape before averaging: (245, 512)
Label 26: array shape before averaging: (26, 512)
Label 72: array shape before averaging: (21, 512)
Label 56: array shape before averaging: (41, 512)
Label 111: array shape before averaging: (18, 512)
Label 107: array shape before averaging: (191, 512)
Label 5: array shape before averaging: (20, 512)
Label 82: array shape before averaging: (53, 512)
Label 46: array shape before averaging: (67, 512)
Label 110: array shape before averaging: (79, 512)
Label 41: array shape before averaging: (51, 512)
Label 23: array shape before averaging: (71, 512)
Label 69: array shape before averaging: (42, 512)
Label 15: array shape before averaging: (86, 512)
Label 74: array shape before averaging: (49, 512)
Label 35: array shape before averaging: (91, 512)
Label 100: array shape before averaging: (77, 512)
Label 67: array shape before averaging: (85, 512)
Label 58: array shape before averaging: (30,

Processing test images: 100%|██████████| 296/296 [00:50<00:00,  5.89it/s]


In [28]:
def create_submission_file(results, filename="submission_file_lastt.csv"):
    """
    Creates a submission CSV file for the face re-identification task.
    
    Args:
        results (list): A list of dictionaries, each with keys "pred" and "image_path".
        filename (str): Name of the CSV file to be saved.
    
    The submission file will have the following columns:
        - ID: Unique identifier for each entry.
        - frame: Set to -1 (as per the task requirement).
        - objects: A stringified dictionary with keys 'gt' (the prediction)
                   and 'image' (the image path).
        - objective: Set to "face_reid".
    """
    submission = []
    for idx, res in enumerate(results):
        submission.append({
            "ID": idx,
            "frame": -1,
            "objects": str({'gt': res["pred"], 'image':'test_set/' + res["image_path"]}),
            "objective": "face_reid"
        })
    
    submission_df = pd.DataFrame(submission)
    submission_df.to_csv(filename, index=False)
    print(f"Submission file saved at {filename}")
    return submission_df

In [29]:
submission_df = create_submission_file(results)

Submission file saved at submission_file_lastt.csv
