In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from facenet_pytorch import InceptionResnetV1
from PIL import Image
import os
import onnx

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [3]:
dataset = datasets.ImageFolder(
    root="../img-example",
    transform=transform
)

In [4]:
root = "../img-example"
for folder in os.listdir(root):
    folder_path = os.path.join(root, folder)
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        try:
            with Image.open(file_path) as img:
                img.verify()
        except (IOError, SyntaxError) as e:
            print(f"Gambar rusak: {file_path} - {e}")
            os.remove(file_path)

In [5]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [6]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [7]:
print(f"Jumlah data training: {len(train_dataset)}")
print(f"Jumlah data validasi: {len(val_dataset)}")

Jumlah data training: 5135
Jumlah data validasi: 1284


In [8]:
num_classes = len(dataset.classes)  
model = InceptionResnetV1(pretrained="vggface2")
model.last_linear = nn.Linear(model.last_linear.in_features, num_classes)
model.last_bn = nn.BatchNorm1d(num_classes)
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
num_epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

for epoch in range(num_epochs):
    # Training
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_loss = running_loss / len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

Epoch 1/5, Train Loss: 7.8431, Val Loss: 7.8200, Val Accuracy: 2.18%
Epoch 2/5, Train Loss: 7.8217, Val Loss: 7.8115, Val Accuracy: 3.27%
Epoch 3/5, Train Loss: 7.8141, Val Loss: 7.8117, Val Accuracy: 3.43%
Epoch 4/5, Train Loss: 7.8071, Val Loss: 7.8038, Val Accuracy: 3.58%
Epoch 5/5, Train Loss: 7.8037, Val Loss: 7.7905, Val Accuracy: 4.91%


In [11]:
torch.save(model.state_dict(), "../app/models/facenet_finetuned.pth")

In [12]:
print(f"Input image size: {images.shape}")
print(f"Model output size: {outputs.shape}")

Input image size: torch.Size([4, 3, 160, 160])
Model output size: torch.Size([4, 2598])


In [13]:
class EmbeddingModel(torch.nn.Module):
    def __init__(self, model):
        super(EmbeddingModel, self).__init__()
        self.model = model

    def forward(self, x):
        x = self.model.logits(x)  
        return x

In [14]:
model.load_state_dict(torch.load("../app/models/facenet_finetuned.pth"), strict=False)
embedding_model = EmbeddingModel(model)
embedding_model.eval()

EmbeddingModel(
  (model): InceptionResnetV1(
    (conv2d_1a): BasicConv2d(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (conv2d_2a): BasicConv2d(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (conv2d_2b): BasicConv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2d_3b): BasicConv2d(
      (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True,