In [1]:
# import os
# from PIL import Image
# import pillow_heif
# 
# # pillow_heif.register_heif_opener()
# 
# def heic_to_jpg(input_path, output_path):
#     image = Image.open(input_path)
#     image.save(output_path, "JPEG")
# 
# def delete_all_heic_in_folder(folder_path):
#     if not os.path.isdir(folder_path):
#         print(f"Error: Folder '{folder_path}' does not exist.")
#         return
# 
#     count = 0
#     for filename in os.listdir(folder_path):
#         if filename.lower().endswith(".heic"):
#             file_path = os.path.join(folder_path, filename)
#             try:
#                 os.remove(file_path)
#                 print(f"Deleted: {filename}")
#                 count += 1
#             except Exception as e:
#                 print(f"Failed to delete {filename}: {e}")
#     
#     print(f"Total .heic files deleted: {count}")
# 
# def convert_all_heic_in_folder(input_folder, output_folder=None):
#     if not os.path.isdir(input_folder):
#         print(f"Error: Input folder '{input_folder}' does not exist.")
#         return
# 
#     if output_folder is None:
#         output_folder = input_folder
# 
#     os.makedirs(output_folder, exist_ok=True)
# 
#     for filename in os.listdir(input_folder):
#         if filename.lower().endswith(".heic"):
#             input_path = os.path.join(input_folder, filename)
#             output_filename = os.path.splitext(filename)[0] + ".jpg"
#             output_path = os.path.join(output_folder, output_filename)
# 
#             try:
#                 heic_to_jpg(input_path, output_path)
#                 print(f"Converted: {filename} → {output_filename}")
#             except Exception as e:
#                 print(f"Failed to convert {filename}: {e}")
# 
# # delete_all_heic_in_folder("./indep_data/kuharic")

# Setup

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from facenet_pytorch import InceptionResnetV1

  from .autonotebook import tqdm as notebook_tqdm


# Data manipulation

In [3]:
class FaceDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label


In [4]:
class RealVGGFace2Model(nn.Module):
    def __init__(self, num_classes, pretrained='vggface2'):
        super(RealVGGFace2Model, self).__init__()
        self.backbone = InceptionResnetV1(pretrained=pretrained)
        feature_dim = 512

        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(feature_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

        for param in self.backbone.parameters():
            param.requires_grad = False
    
    def forward(self, x):
        features = self.backbone(x)
        output = self.classifier(features)
        return output
    
    def get_embeddings(self, x):
        with torch.no_grad():
            embeddings = self.backbone(x)
        return embeddings
    
    def unfreeze_backbone(self):
        for param in self.backbone.parameters():
            param.requires_grad = True

In [5]:
def get_vggface2_transforms():
    train_transform = transforms.Compose([
        transforms.Resize((160, 160)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # [-1, 1] normalization
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize((160, 160)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    return train_transform, val_transform

In [6]:
def load_face_dataset(data_dir):
    image_paths = []
    labels = []
    class_names = []
    
    for idx, person_folder in enumerate(sorted(os.listdir(data_dir))):
        person_path = os.path.join(data_dir, person_folder)
        if os.path.isdir(person_path):
            class_names.append(person_folder)
            for img_file in os.listdir(person_path):
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    image_paths.append(os.path.join(person_path, img_file))
                    labels.append(idx)
    
    return image_paths, labels, class_names

# Model Help Func

In [7]:
def train_vggface2_model(model, train_loader, val_loader, num_epochs=20, lr=0.001):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    
    optimizer = optim.Adam(model.classifier.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    print("Phase 1: Training classifier only (backbone frozen)")
    train_losses, val_accuracies = [], []
    
    for epoch in range(num_epochs // 2):
        model.train()
        running_loss = 0.0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        model.eval()
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        epoch_loss = running_loss / len(train_loader)
        
        train_losses.append(epoch_loss)
        val_accuracies.append(val_accuracy)
        
        print(f'Epoch [{epoch+1}/{num_epochs//2}], Loss: {epoch_loss:.4f}, Val Acc: {val_accuracy:.2f}%')
        scheduler.step()
    print("\nPhase 2: Fine-tuning entire model (backbone unfrozen)")
    model.unfreeze_backbone()
    optimizer = optim.Adam(model.parameters(), lr=lr/10, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    for epoch in range(num_epochs // 2, num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        model.eval()
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        epoch_loss = running_loss / len(train_loader)
        
        train_losses.append(epoch_loss)
        val_accuracies.append(val_accuracy)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Val Acc: {val_accuracy:.2f}%')
        scheduler.step()
    
    return train_losses, val_accuracies

In [8]:
def compare_faces_vggface2(model, img1_path, img2_path, threshold=0.6):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    
    _, val_transform = get_vggface2_transforms()
    
    try:
        img1 = Image.open(img1_path).convert('RGB')
        img2 = Image.open(img2_path).convert('RGB')
        
        img1_tensor = val_transform(img1).unsqueeze(0).to(device)
        img2_tensor = val_transform(img2).unsqueeze(0).to(device)
        
        with torch.no_grad():
            emb1 = model.get_embeddings(img1_tensor)
            emb2 = model.get_embeddings(img2_tensor)
            similarity = torch.nn.functional.cosine_similarity(emb1, emb2)
            similarity_score = similarity.item()
            
            is_same_person = similarity_score > threshold
            
        return is_same_person, similarity_score
        
    except Exception as e:
        print(f"Error comparing images: {e}")
        return False, 0.0

In [9]:
def main_vggface2_training():
    DATA_DIR = "./indep_data"
    BATCH_SIZE = 8
    NUM_EPOCHS = 20
    LEARNING_RATE = 0.001
    
    print("Loading dataset...")
    image_paths, labels, class_names = load_face_dataset(DATA_DIR)
    print(f"Found {len(image_paths)} images across {len(class_names)} people")
    print(f"People: {class_names}")
    
    train_paths, val_paths, train_labels, val_labels = train_test_split(
        image_paths, labels, test_size=0.2, stratify=labels, random_state=42
    )
    
    train_transform, val_transform = get_vggface2_transforms()
    train_dataset = FaceDataset(train_paths, train_labels, train_transform)
    val_dataset = FaceDataset(val_paths, val_labels, val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    num_classes = len(class_names)
    model = RealVGGFace2Model(num_classes=num_classes, pretrained='vggface2')
    
    print(f"Training REAL VGG-Face2 model with {num_classes} classes...")
    print("This model is actually pretrained on VGG-Face2 dataset!")
    
    train_losses, val_accuracies = train_vggface2_model(
        model, train_loader, val_loader, 
        num_epochs=NUM_EPOCHS, lr=LEARNING_RATE
    )
    
    torch.save({
        'model_state_dict': model.state_dict(),
        'class_names': class_names,
        'num_classes': num_classes
    }, 'real_vggface2_model.pth')
    
    print("REAL VGG-Face2 model saved as 'real_vggface2_model.pth'")
    
    return model, class_names

In [10]:
def load_real_vggface2_model():
    checkpoint = torch.load('real_vggface2_model.pth', map_location='cpu')
    
    num_classes = checkpoint['num_classes']
    class_names = checkpoint['class_names']
    
    model = RealVGGFace2Model(num_classes=num_classes, pretrained='vggface2')
    model.load_state_dict(checkpoint['model_state_dict'])
    
    return model, class_names

# Running it all

In [11]:
model, class_names = main_vggface2_training()


# Example usage:
# img1_path = "./indep_data/person1/photo1.jpg"
# img2_path = "./indep_data/person2/photo1.jpg"
# is_same, similarity = compare_faces_vggface2(model, img1_path, img2_path)
# print(f"Same person: {is_same}, VGG-Face2 similarity: {similarity:.3f}")


Loading dataset...
Found 276 images across 2 people
People: ['kuharic', 'loknar']


100%|██████████| 107M/107M [00:43<00:00, 2.58MB/s] 


Training REAL VGG-Face2 model with 2 classes...
This model is actually pretrained on VGG-Face2 dataset!
Phase 1: Training classifier only (backbone frozen)
Epoch [1/10], Loss: 0.5895, Val Acc: 80.36%
Epoch [2/10], Loss: 0.3275, Val Acc: 91.07%
Epoch [3/10], Loss: 0.2493, Val Acc: 91.07%
Epoch [4/10], Loss: 0.1919, Val Acc: 98.21%
Epoch [5/10], Loss: 0.1885, Val Acc: 94.64%
Epoch [6/10], Loss: 0.1496, Val Acc: 98.21%
Epoch [7/10], Loss: 0.1908, Val Acc: 87.50%
Epoch [8/10], Loss: 0.2038, Val Acc: 94.64%
Epoch [9/10], Loss: 0.1477, Val Acc: 94.64%
Epoch [10/10], Loss: 0.1349, Val Acc: 94.64%

Phase 2: Fine-tuning entire model (backbone unfrozen)
Epoch [11/20], Loss: 0.1091, Val Acc: 100.00%
Epoch [12/20], Loss: 0.0497, Val Acc: 100.00%
Epoch [13/20], Loss: 0.0639, Val Acc: 94.64%
Epoch [14/20], Loss: 0.1254, Val Acc: 100.00%
Epoch [15/20], Loss: 0.0225, Val Acc: 98.21%
Epoch [16/20], Loss: 0.0108, Val Acc: 100.00%
Epoch [17/20], Loss: 0.0393, Val Acc: 100.00%
Epoch [18/20], Loss: 0.0232,

# Example usage for face verification

In [12]:
def direct_vggface2_comparison(img1_path, img2_path, threshold=0.6):
    model = InceptionResnetV1(pretrained='vggface2').eval()
    
    _, transform = get_vggface2_transforms()

    img1 = transform(Image.open(img1_path).convert('RGB')).unsqueeze(0)
    img2 = transform(Image.open(img2_path).convert('RGB')).unsqueeze(0)
    
    with torch.no_grad():
        emb1 = model(img1)
        emb2 = model(img2)
        
        similarity = torch.nn.functional.cosine_similarity(emb1, emb2).item()
        is_same = similarity > threshold
    
    return is_same, similarity


In [None]:
def quick_vggface2_compare(img1_path, img2_path):
    """Quick comparison using pure VGG-Face2 (no training needed)"""
    is_same, similarity = direct_vggface2_comparison(img1_path, img2_path)
    print(f"VGG-Face2 comparison:")
    print(f"Same person: {is_same}")
    print(f"Similarity: {similarity:.3f}")

: 