In [1]:
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torch.optim import Adam
import pandas as pd
import os
from facenet_pytorch import MTCNN, InceptionResnetV1
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from PIL import Image
import numpy as np
import cv2

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
class FaceDataset(Dataset):
    def __init__(self, label_file, img_dir, transform=None):
        self.labels = pd.read_csv(label_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        df_idx = self.labels['cum_count'].searchsorted(idx, side='left')
        img_row = self.labels.iloc[df_idx, :]
        if df_idx > 0:
            img_idx = idx - self.labels.iloc[df_idx - 1]['cum_count']
        else:
            img_idx = idx
        img_idx += 1
        img_idx = str(img_idx)
        img_idx = '0' * (4 - len(img_idx)) + img_idx
        img_name = img_row['name'] + "_" + img_idx + ".jpg"
        img_path = os.path.join(self.img_dir, img_row['name'], img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = img_row['name']
        return image, label
        
        

In [13]:
data_root = os.path.join("..", "data", "raw", "lfw")
label_file = os.path.join(data_root, "lfw_allnames.csv")
data_root = os.path.join(data_root, "lfw-deepfunneled")


In [4]:
datas = FaceDataset(label_file=label_file, img_dir=data_root)
print(len(datas))

5749


In [5]:
loader = DataLoader(datas, batch_size=32, shuffle=True, num_workers=2)

In [6]:
face_detector = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, device='cuda' if torch.cuda.is_available() else 'cpu')
resnet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')



In [None]:
# Testing with multiple images - manual batching
sample_imgs = [datas.__getitem__(102)[0], datas.__getitem__(112)[0]]

# Convert each PIL image to numpy array
img_arrays = [np.array(img) for img in sample_imgs]

# Stack into batch
batch = np.stack(img_arrays, axis=0)
print(f"Batch shape: {batch.shape}")

# Process batch with MTCNN
results = face_detector(batch)
print(f"Number of results: {len(results)}, Type: {type(results)}")

# Filter out None values (images where no face was detected)
detected_faces = [face for face in results if face is not None]

if len(detected_faces) > 0:
    # Stack all detected faces into a batch
    faces_batch = torch.stack(detected_faces)
    print(f"Detected faces batch shape: {faces_batch.shape}")
    
    # Get embeddings
    embeddings = resnet(faces_batch.to('cuda' if torch.cuda.is_available() else 'cpu'))
    print(f"Embeddings shape: {embeddings.shape}")
    
    # Access individual embeddings
    for i, emb in enumerate(embeddings):
        print(f"Face {i} embedding: {emb.shape}")

    print("No faces detected in any image.")

Batch shape: (2, 250, 250, 3)
Number of results: 2, Type: <class 'list'>
Detected faces batch shape: torch.Size([2, 3, 160, 160])
Embeddings shape: torch.Size([2, 512])
Face 0 embedding: torch.Size([512])
Face 1 embedding: torch.Size([512])


In [78]:
import torch.nn as nn

class EmbeddingClassifier(nn.Module):
    def __init__(self, embedding_dim, num_classes):
        super(EmbeddingClassifier, self).__init__()
        self.fc1 = nn.Linear(embedding_dim, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)


    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x
    
num_classes = len(datas.labels)
model = EmbeddingClassifier(embedding_dim=512, num_classes=num_classes).to('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()   
optimizer = Adam(model.parameters(), lr=0.001)

In [9]:
len(loader)


180

2