## Tarea 1

Entrenamos un modelo para reconocimiento de grupos de edad

In [2]:
import os
import numpy as np
import cv2
from time import time
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, classification_report, confusion_matrix
from sklearn.svm import LinearSVC
from time import time
import joblib
from deepface import DeepFace
from collections import deque
import pandas as pd
from shutil import copyfile
from tqdm import tqdm
import json
import random

### Cargado de datos

Organizamos las imágenes en carpetas para poder procesarlas correctamente

In [None]:

# Hay que modificar las siguientes rutas según donde se tenga el dataset
csv_path = "C:/Users/lllrm/Downloads/dataset_VC/fairface_label_train.csv"
output_dir = "C:/Users/lllrm/Downloads/dataset_VC/ages"
image_path = "C:/Users/lllrm/Downloads/dataset_VC"

df = pd.read_csv(csv_path)

for _, row in df.iterrows():
    age_group = row['age']
    src = os.path.join(image_path, row['file'])
    dst = os.path.join(output_dir, age_group, os.path.basename(row['file']))

    os.makedirs(os.path.dirname(dst), exist_ok=True)

    if os.path.exists(src):
        copyfile(src, dst)

## Entrenamiento (Deepface embeddings)

Creación de embeddings de todas las imágenes de entrenamiento

In [None]:


folder = "C:/Users/lllrm/Downloads/dataset_VC/ages"

X_file = "X_embeddings.npy"
Y_file = "Y_embeddings.npy"
labels_file = "class_labels_embeddings.npy"

# En caso de que ya estén guuardados, cargamos los embeddings
if os.path.exists(X_file) and os.path.exists(Y_file):
    print("Loading cached embeddings...")
    X = np.load(X_file)
    Y = np.load(Y_file)
    classlabels = np.load(labels_file, allow_pickle=True)
    print("Loaded embeddings!")
else:
    X = []
    Y = []
    classlabels = []

    nclasses = 0
    for class_name in os.listdir(folder):
        class_folder = os.path.join(folder, class_name)
        if not os.path.isdir(class_folder):
            continue
        nclasses += 1
        classlabels.append(class_name)

        for file_name in tqdm(os.listdir(class_folder)):
            if not file_name.lower().endswith(".jpg"):
                continue
            img_path = os.path.join(class_folder, file_name)
            image = cv2.imread(img_path)

            # Extraemos el embedding con Deepface
            embedding = DeepFace.represent(img_path=img_path, model_name="Facenet512", enforce_detection=False)
            X.append(embedding[0]["embedding"])
            Y.append(nclasses - 1)

    X = np.array(X, dtype="float32")
    Y = np.array(Y, dtype="int64")

    # Guardamos en archivos para evitar la espera
    np.save(X_file, X)
    np.save(Y_file, Y)
    np.save(labels_file, np.array(classlabels))
    print("Saved embeddings!")


100%|██████████| 1792/1792 [09:47<00:00,  3.05it/s]
100%|██████████| 9103/9103 [54:26<00:00,  2.79it/s]  
100%|██████████| 11486/11486 [1:08:39<00:00,  2.79it/s]
100%|██████████| 10408/10408 [1:01:28<00:00,  2.82it/s]
100%|██████████| 10951/10951 [1:01:27<00:00,  2.97it/s]
100%|██████████| 10744/10744 [1:05:20<00:00,  2.74it/s]  
100%|██████████| 6228/6228 [30:05<00:00,  3.45it/s]
100%|██████████| 2779/2779 [13:25<00:00,  3.45it/s]
100%|██████████| 842/842 [04:05<00:00,  3.43it/s]


Saved embeddings!


División del dataset entre entrenamiento y validación

In [None]:
if not X or not Y:
    X = np.load("X_embeddings.npy")
    Y = np.load("Y_embeddings.npy")

X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42)

print("# samples in training set %d" % X_train.shape[0])
print("# samples in test set %d" % X_test.shape[0])

# samples in training set 60720
# samples in test set 26024


Entrenamiento del modelo con los embeddings obtenidos

In [None]:

model_svm = LinearSVC(C=5, dual=False, max_iter=5000)

X = np.load("X_embeddings.npy")

print("Training SVM on embeddings...")
t0 = time()
model_svm.fit(X_train, y_train)
print("Training done in %0.2f seconds" % (time() - t0))

joblib.dump(model_svm, "svm_deepface_embeddings.joblib")
print("Model saved!")

y_pred = model_svm.predict(X_test)
print(classification_report(y_test, y_pred, target_names=classlabels))
print("Precision: %0.3f, Recall: %0.3f" %
      (precision_score(y_test, y_pred, average="macro"),
       recall_score(y_test, y_pred, average="macro")))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))


Training SVM on embeddings...
Training done in 1150.62 seconds
Model saved!
              precision    recall  f1-score   support

         0-2       0.68      0.26      0.38       358
       10-19       0.45      0.26      0.33      1821
       20-29       0.43      0.55      0.48      2297
         3-9       0.56      0.83      0.67      2082
       30-39       0.39      0.24      0.30      2190
       40-49       0.40      0.57      0.47      2149
       50-59       0.47      0.34      0.39      1246
       60-69       0.46      0.25      0.33       556
more than 70       0.57      0.07      0.13       168

    accuracy                           0.46     12867
   macro avg       0.49      0.38      0.39     12867
weighted avg       0.46      0.46      0.43     12867

Precision: 0.490, Recall: 0.375
Confusion matrix:
 [[  94    1    7  247    3    5    1    0    0]
 [   5  472  512  608  104  112    7    1    0]
 [   3  221 1266  232  305  261    8    1    0]
 [  24  138   91 1729   

Ejecucion final del modelo entrenado

In [4]:
# Expansión del recorte de la cara para mejor predicción
def expand_box(x, y, w, h, scale=1.3):
    cx = x + w // 2
    cy = y + h // 2
    new_w = int(w * scale)
    new_h = int(h * scale)
    new_x = max(0, cx - new_w // 2)
    new_y = max(0, cy - new_h // 2)
    return new_x, new_y, new_w, new_h

# Preprocesamiento de la cara antes de pasarla al modelo
def preprocess_face(face_crop):
    face_resized = cv2.resize(face_crop, (160, 160))

    face_yuv = cv2.cvtColor(face_resized, cv2.COLOR_BGR2YUV)
    face_yuv[:, :, 0] = cv2.equalizeHist(face_yuv[:, :, 0])
    face_norm = cv2.cvtColor(face_yuv, cv2.COLOR_YUV2BGR)

    return cv2.cvtColor(face_norm, cv2.COLOR_BGR2RGB)


def predict_age_from_face(face_crop):
    try:
        rgb = preprocess_face(face_crop)

        # Obtenemos el embedding de la cara
        embedding = DeepFace.represent(
            img_path=rgb,
            model_name="Facenet512",
            model=embedding_model,
            enforce_detection=False
        )

        # Para asegurar funcionamiento constante, forzamos la predicción si no hay embedding
        if embedding is None or len(embedding) == 0:
            print("No embedding, forcing prediction...")
            vec = np.random.rand(1, 512).astype("float32")
        else:
            vec = np.array(embedding[0]["embedding"]).reshape(1, -1)

        pred_class = model_svm.predict(vec)[0]
        return pred_class

    except:
        vec = np.random.rand(1, 512).astype("float32")
        pred_class = model_svm.predict(vec)[0]
        return pred_class

# Predecir con respecto a los últimos 12 frames 
pred_buffer = deque(maxlen=12)

# Elegimos la predicción más frecuente
def smooth_prediction(pred_class):
    pred_buffer.append(pred_class)
    return max(set(pred_buffer), key=pred_buffer.count)


print("Loading SVM model...")
model_svm = joblib.load('svm_deepface_embeddings.joblib')

print("Loading class labels...")
classlabels = np.load("class_labels_embeddings.npy", allow_pickle=True)

print("Loading Facenet embedding model...")
embedding_model = DeepFace.build_model("Facenet512")

print("All models loaded successfully.\n")


vid = cv2.VideoCapture(0)

while True:
    ret, frame = vid.read()
    frame = cv2.flip(frame, 1)

    if not ret:
        break

    try:
        # Detectamos las caras presentes en el frame
        faces = DeepFace.extract_faces(frame, detector_backend='opencv')
    except:
        faces = []

    for face in faces:
        area = face["facial_area"]
        x, y, w, h = area["x"], area["y"], area["w"], area["h"]
        x2, y2, w2, h2 = expand_box(x, y, w, h)

        # Obtenemos la parte del frame que contiene la cara para pasarsela al modelo
        face_crop = frame[y2:y2+h2, x2:x2+w2]

        # Predecimos la edad
        pred = predict_age_from_face(face_crop)
        # Intentamos coger la predicción más frecuente
        smooth_pred = smooth_prediction(pred)

        age_label = str(classlabels[smooth_pred])

        cv2.rectangle(frame, (x2, y2), (x2+w2, y2+h2), (0, 255, 0), 2)

        cv2.putText(frame, f"Age: {age_label}",
                    (x2, y2 - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    0.9, (0, 255, 0), 2)

    cv2.imshow("Age Prediction", frame)

    if cv2.waitKey(1) & 0xFF == 27:  # ESC
        break

vid.release()
cv2.destroyAllWindows()


Loading SVM model...
Loading class labels...
Loading Facenet embedding model...
All models loaded successfully.



## Tarea 2

Implementación del "filtro"

In [None]:
def text(content, frame, y, color):
    font = cv2.FONT_HERSHEY_SIMPLEX
    thickness = 1
    max_width = frame.shape[1] - 40  
    scale = 0.7
 
 
    (text_width, text_height), baseline = cv2.getTextSize(content, font, scale, thickness)
    while text_width > max_width and scale > 0.1:
        scale -= 0.01
        (text_width, text_height), _ = cv2.getTextSize(content, font, scale, thickness)
 
    frame_width = frame.shape[1]
    x_text = (frame_width - text_width) // 2
    y_text = y
    cv2.putText(frame, content, (x_text, y_text), font, scale, color, thickness)

# Captura de la webcam
cap = cv2.VideoCapture(0)
 
x0 = 0
x_offset = 0
is_selected = None
new_question = False
answer = None
delay = 0
index_pregunta = 0
score = 0

preguntas = list()
with open("preguntas.json", "r", encoding="utf-8") as f:
    file = json.load(f)
    
preguntas = file["preguntas"]
pregunta = None
while True:
    # Lee el frame
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)  # Girarlo horizontalmente por el efecto espejo
    if not ret:
        break
 
    try:
        # Detección de cara
        faces = DeepFace.extract_faces(frame, detector_backend='opencv')

        if index_pregunta >= len(preguntas):
            text("Se han terminado las preguntas.", frame, 140, (0,0,0))
            porcentaje = int((score / len(preguntas)) * 100)
            text(f"Tu puntuacion es: {porcentaje}%", frame, 180, (0,0,0))
 
        # Dibuja una línea en el medio
        cv2.line(frame, (frame.shape[1]//2, 0), (frame.shape[1]//2, frame.shape[0]), (255, 0, 0), 1)
        if pregunta is None:
            pregunta = preguntas[index_pregunta]
        if new_question:
            index_pregunta += 1
            new_question = False
        if index_pregunta < len(preguntas):
            pregunta = preguntas[index_pregunta]
        else:
            pregunta = None
       
        # Muestra el texto de las pregunta
        font = cv2.FONT_HERSHEY_SIMPLEX
        thickness = 1
        max_width = frame.shape[1] - 40  
        scale = 0.7
 
        left_text = "verdadero"
        right_text = "falso"

        (p_width, p_height), p_baseline = cv2.getTextSize(pregunta["pregunta"], font, scale, thickness)

        cv2.rectangle(frame, (0, 0), (frame.shape[1], p_height + p_baseline + 50), (255, 255, 255), -1)
 
        text(pregunta["pregunta"], frame, 50, (0,0,0))

        (left_width, left_height), left_baseline = cv2.getTextSize(left_text, font, scale, thickness)
        (right_width, right_height), right_baseline = cv2.getTextSize(right_text, font, scale, thickness)
 
        
        # Muestra la opción 'True'
        padding = 10
        x_left = 10
        (left_width, left_height), left_baseline = cv2.getTextSize(left_text, font, scale, thickness)
        top_left = frame.shape[0] - 20 - left_height - padding // 2
        bottom_left = frame.shape[0] - 20 + left_baseline + padding // 2
        cv2.rectangle(frame, (x_left - padding, top_left), (x_left + left_width + padding, bottom_left), (0, 0, 0), -1)
        cv2.putText(frame, left_text, (x_left, frame.shape[0] - 20), font, scale, (0, 255, 0), thickness)
 
        # Muestra la opción 'False'
        x_right = frame.shape[1] - right_width - 10
        (right_width, right_height), right_baseline = cv2.getTextSize(right_text, font, scale, thickness)
        top_right = frame.shape[0] - 20 - right_height - padding // 2
        bottom_right = frame.shape[0] - 20 + right_baseline + padding // 2
        cv2.rectangle(frame, (x_right - padding, top_right), (x_right + right_width + padding, bottom_right), (0, 0, 0), -1)
        cv2.putText(frame, right_text, (x_right, frame.shape[0] - 20), font, scale, (0, 0, 255), thickness)

        # Imprime la puntuación
        score_text = f"Puntos: {score}"
        cv2.putText(frame, score_text, (x_left, 100), font, scale, (0, 0, 0), 2)

        # Enseña respuesta correcta/incorrecta
        if answer == 0:
            text("Respuesta correcta", frame, 100, (0, 255, 0))
            delay += 1
        elif answer == 1:
            text("Respuesta incorrecta", frame, 100, (0, 0, 255))
            delay += 1
        elif answer == 2:
            text("Respuesta correcta", frame, 100, (0, 255, 0))
            delay += 1
        elif answer == 3:
            text("Respuesta incorrecta", frame, 100, (0, 0, 255))
            delay += 1
        
        if delay >= 25:
            answer = None
            delay = 0
 
        frame_width = frame.shape[1]
 
        # Dibuja el cuadro delimitador de la cara y las ubicaciones de los ojos
        for face in faces:
            x, y, w, h = face['facial_area']['x'], face['facial_area']['y'], face['facial_area']['w'], face['facial_area']['h']
            # Guarda la posición inicial para el primer frame
            if x0 == 0:
                x0 = frame_width // 2
            # Izquierda
            elif x_offset < -(0.1*frame_width):
                # Se considera que la persona ha inclinado la cabeza hacia la izquierda
                x_offset = 0
                # Dibujar un rectángulo sin relleno
                cv2.rectangle(frame, (x_left - padding, top_left), (x_left + left_width + padding, bottom_left), (255, 255, 255), 2)
                is_selected = 0
            # Derecha
            elif x_offset > (0.1*frame_width):
                # Se considera que la persona ha inclinado la cabeza hacia la derecha
                x_offset = 0   
                cv2.rectangle(frame, (x_right - padding, top_right), (x_right + right_width + padding, bottom_right), (255, 255, 255), 2) 
                is_selected = 1
            else:
                x_offset = 0
                if is_selected is not None:
                    # Mostramos la respuesta correcta
                    if is_selected == 0 and pregunta["respuesta_correcta"] == "Verdadero":
                        answer = 0
                        score += 1
                    elif is_selected == 0 and pregunta["respuesta_correcta"] == "Falso":
                        answer = 1
                    elif is_selected == 1 and pregunta["respuesta_correcta"] == "Falso":
                        answer = 2
                        score += 1
                    elif is_selected == 1 and pregunta["respuesta_correcta"] == "Verdadero":
                        answer = 3
                    is_selected = None
                    new_question = True                    
            x_offset += (x + w//2) - x0
            cv2.rectangle(frame, (x, y), (x + w, y + h), (200, 255, 200), 2)
 
    except Exception as e:
        print("Error:", e)
    cv2.imshow("Video", frame)
 
    # Salir si se presiona Esc o cambia el detectr
    key = cv2.waitKey(1) & 0xFF
    if key == 27:  # Salir si se presiona 'q'
        break
 
# Liberar la captura y cerrar ventanas
cap.release()
cv2.destroyAllWindows()

Error: Face could not be detected in numpy array.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
Error: Face could not be detected in numpy array.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
Error: Face could not be detected in numpy array.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
Error: Face could not be detected in numpy array.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
Error: Face could not be detected in numpy array.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
Error: Face could not be detected in numpy array.Please confirm that the picture is a face photo or consider to set enforce_detection param to False.
Error: Face could not be detected in numpy array.Please confirm that the picture is a face photo or 