In [1]:
# Imports principais
import os
import cv2
import torch
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from facenet_pytorch import MTCNN, InceptionResnetV1
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from PIL import Image
from tqdm import tqdm

In [2]:
# Configurações globais
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DATA_DIR = 'data/dataset'  # Estrutura: data/dataset/<person_name>/*.jpg
EMBED_FILE = 'data/train_embeddings.pkl'
MODEL_FILE = 'models/classifier.joblib'

os.makedirs('data/dataset', exist_ok=True)
os.makedirs('models', exist_ok=True)

In [3]:
# Extração de embeddings
mtcnn = MTCNN(image_size=160, margin=20, keep_all=False, device=DEVICE)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(DEVICE)

def extract_embeddings():
    embeddings, labels = [], []
    for person in sorted(os.listdir(DATA_DIR)):
        person_dir = os.path.join(DATA_DIR, person)
        if not os.path.isdir(person_dir):
            continue
        for file in tqdm(os.listdir(person_dir), desc=person):
            path = os.path.join(person_dir, file)
            try:
                img = Image.open(path).convert('RGB')
            except:
                continue
            face = mtcnn(img)
            if face is None:
                continue
            with torch.no_grad():
                face = face.unsqueeze(0).to(DEVICE)
                emb = resnet(face).cpu().numpy()[0]
            embeddings.append(emb)
            labels.append(person)
    data = {"embeddings": np.array(embeddings), "labels": np.array(labels)}
    joblib.dump(data, EMBED_FILE)
    print(f"Embeddings salvos em {EMBED_FILE}")

extract_embeddings()

  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(cached_file)


Embeddings salvos em data/train_embeddings.pkl


In [4]:
# Treinamento do classificador SVM
def train_classifier():
    data = joblib.load(EMBED_FILE)
    X, y = data['embeddings'], data['labels']

    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    X_train, X_val, y_train, y_val = train_test_split(
        X, y_enc, test_size=0.15, random_state=42, stratify=y_enc
    )

    clf = SVC(kernel='linear', probability=True)
    clf.fit(X_train, y_train)

    preds = clf.predict(X_val)
    print("Acurácia:", accuracy_score(y_val, preds))
    print(classification_report(y_val, preds, target_names=le.classes_))

    joblib.dump({"model": clf, "label_encoder": le}, MODEL_FILE)
    print(f"Modelo salvo em {MODEL_FILE}")

train_classifier()

ValueError: With n_samples=0, test_size=0.15 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.