# Detección y caracterización de caras

### Entrenamiento para detección de attributos

In [11]:
import torch
import torch.nn as nn
from sympy import horner
from sympy.physics.vector import get_motion_params
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader

EPOCHS = 35
DATASET_BASE_DIRECTORY = "../emotion-recognition-dataset/"
BATCH_SIZE = 32
NUM_CLASSES = 8
last_accuracy = 0.5844

# --- Data transforms ---
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

# Load your dataset
train_dataset = datasets.ImageFolder(DATASET_BASE_DIRECTORY + "train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_dataset = datasets.ImageFolder(DATASET_BASE_DIRECTORY + "val", transform=transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# --- Load EfficientNet ---
weights = models.EfficientNet_B2_Weights.IMAGENET1K_V1
emotion_model = models.efficientnet_b2(weights=weights)
# Replace classifier for 8 emotion classes
num_features = emotion_model.classifier[1].in_features
emotion_model.classifier[1] = nn.Linear(num_features, NUM_CLASSES)


for param in emotion_model.features[:5].parameters():  # adjust slice as needed
    param.requires_grad = False

# Move to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
emotion_model = emotion_model.to(device)

# --- Loss & optimizer ---
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(emotion_model.parameters(), lr=1e-4)

# --- Training loop (simplified) ---
for epoch in range(EPOCHS):
    emotion_model.train()
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = emotion_model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch} done")


# --- Validation loop ---
emotion_model.eval()  # set model to evaluation mode
running_loss = 0.0
correct = 0
total = 0

with torch.no_grad():  # disable gradient computation
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = emotion_model(imgs)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * imgs.size(0)

        # Compute number of correct predictions
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

avg_loss = running_loss / total
accuracy = correct / total

print(f"Validation Loss: {avg_loss:.4f}, Accuracy: {accuracy*100:.2f}%")

if accuracy > last_accuracy:
    torch.save(emotion_model.state_dict(), "emotion-efficientnet-weights.pth")


Epoch 0 done
Epoch 1 done
Epoch 2 done
Epoch 3 done
Epoch 4 done
Epoch 5 done
Epoch 6 done
Epoch 7 done
Epoch 8 done
Epoch 9 done
Epoch 10 done
Epoch 11 done
Epoch 12 done
Epoch 13 done
Epoch 14 done
Epoch 15 done
Epoch 16 done
Epoch 17 done
Epoch 18 done
Epoch 19 done
Epoch 20 done
Epoch 21 done
Epoch 22 done
Epoch 23 done
Epoch 24 done
Epoch 25 done
Epoch 26 done
Epoch 27 done
Epoch 28 done
Epoch 29 done
Epoch 30 done
Epoch 31 done
Epoch 32 done
Epoch 33 done
Epoch 34 done
Validation Loss: 1.9946, Accuracy: 60.50%


### Filtro de emociones

In [45]:
from abc import ABC, abstractmethod
import cv2
from deepface import DeepFace


class Filter(ABC):
    @abstractmethod
    def apply(self, img, face_x, face_y, face_w, face_h):
        pass

class HappyFilter(Filter):
    def __init__(self, halo_image_path):
        self.halo_overlay = cv2.imread(halo_image_path, cv2.IMREAD_UNCHANGED)

    def apply(self, img, face_x, face_y, face_w, face_h):
        offset_y = int(0.5 * face_h)  # adjust 0.5 to move more or less
        halo_width = int(face_w * 1.5)
        halo_height = int(face_h * 0.3)  # or resize proportionally

        halo_x = max(int(face_x - face_w / 4), 0)
        halo_y = max(face_y - offset_y, 0)  # make sure we don’t go negative
        halo_resized = cv2.resize(self.halo_overlay, (halo_width, halo_height))
        halo_rgb = halo_resized[:, :, :3]
        alpha_mask = halo_resized[:, :, 3] / 255.0

        roi = img[halo_y:halo_y + halo_height, halo_x:halo_x + halo_width]

        for c in range(3):
            roi[:, :, c] = (alpha_mask * halo_rgb[:, :, c] + (1 - alpha_mask) * roi[:, :, c])

        img[halo_y:halo_y + halo_height, halo_x:halo_x + halo_width] = roi

class AngryFilter(Filter):
    def __init__(self, demon_img_path):
        self.demon_overlay = cv2.imread(demon_img_path, cv2.IMREAD_UNCHANGED)

    def apply(self, img, face_x, face_y, face_w, face_h):
        offset_y = int(face_h*0.2)  # adjust 0.5 to move more or less
        horns_width = face_w
        horns_height = int(face_h * 0.4)  # or resize proportionally

        horns_x = face_x
        horns_y = max(face_y - offset_y, 0)  # make sure we don’t go negative
        horns_resized = cv2.resize(self.demon_overlay, (horns_width, horns_height))
        horns_rgb = horns_resized[:, :, :3]
        alpha_mask = horns_resized[:, :, 3] / 255.0

        roi = img[horns_y:horns_y + horns_height, horns_x:horns_x + horns_width]

        for c in range(3):
            roi[:, :, c] = (alpha_mask * horns_rgb[:, :, c] + (1 - alpha_mask) * roi[:, :, c])

        img[horns_y:horns_y + horns_height, horns_x:horns_x + horns_width] = roi

class NoFilter(Filter):
    def apply(self, img, face_x, face_y, face_w, face_h):
        pass

# happy, angry, fear, neutral, sad, disgust, surprise
filters = {
    "happy": HappyFilter("resources/images/aureola.png"),
    "angry": AngryFilter("resources/images/devil-horns.png"),
}

def predict_emotion(cropped_face):
    cropped_face_rgb = cv2.cvtColor(cropped_face, cv2.COLOR_BGR2RGB)
    objs = DeepFace.analyze(cropped_face_rgb, actions = ['emotion'], enforce_detection=False)
    return objs[0]["dominant_emotion"]


cap = cv2.VideoCapture(0)
ret, frame = cap.read()
key = 0

while ret and key != 27:
    try:
        for face in DeepFace.extract_faces(frame, detector_backend="yolov8", enforce_detection=False):
            x, y, w, h = face['facial_area']['x'], face['facial_area']['y'], face['facial_area']['w'], face['facial_area']['h']
            filters.get(
                predict_emotion(frame[y:y + h, x:x + w]),
                NoFilter()
            ).apply(frame, x, y, w, h)

    except Exception as e:
        print("Error:", e)

    cv2.imshow("Video", frame)
    ret, frame = cap.read()
    key = cv2.waitKey(1) & 0xFF

# Liberar la captura y cerrar ventanas
cap.release()
cv2.destroyAllWindows()