#  Good Seed Face recognition

La cadena de supermercados Good Seed quiere asegurarse de no vender alcohol a personas menores de edad.
El proyecto se basa en construir y evaluar un modelo para verificar la edad de las personas.

## 0. Inicialización

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Flatten, Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

## 1. EDA

In [None]:
def eda(path):

    csv_path = os.path.join(path, 'labels.csv')
    df = pd.read_csv(csv_path)

    print("\n------ Información del dataset ------")
    print(df.info())

    print("\n------ Estadísticas descriptivas ------")
    print(df.describe())

    # Histograma de edades
    plt.figure(figsize=(16, 8))
    sns.histplot(df['real_age'], bins=30, kde=True)
    plt.title('Distribución de edades reales')
    plt.xlabel('Edad')
    plt.ylabel('Frecuencia')
    plt.show()

    # Boxplot de edades
    plt.figure(figsize=(16, 8))
    sns.boxplot(x=df['real_age'])
    plt.title('Boxplot de edades')
    plt.show()

    # Muestra aleatoria de imágenes
    print("\n------ Muestra de imágenes ------")
    sample = df.sample(9, random_state=42)
    plt.figure(figsize=(10, 10))

    for i, row in enumerate(sample.itertuples(), 1):
        img_path = os.path.join(path, 'final_files', row.file_name)
        img = plt.imread(img_path)
        plt.subplot(3, 3, i)
        plt.imshow(img)
        plt.title(f"Edad: {row.real_age}")
        plt.axis('off')
    plt.tight_layout()
    plt.show()

## 2. Cargar datos

In [None]:
def load_data(path):

    labels = pd.read_csv(f'{path}/labels.csv')

    datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.25,
        horizontal_flip=True,
        vertical_flip=True
    )

    train_datagen_flow = datagen.flow_from_dataframe(
        dataframe=labels,
        directory=f'{path}/final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=32,
        class_mode='raw',
        subset='training',
        seed=12345
    )

    val_datagen_flow = datagen.flow_from_dataframe(
        dataframe=labels,
        directory=f'{path}/final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=32,
        class_mode='raw',
        subset='validation',
        seed=12345
    )

    return train_datagen_flow, val_datagen_flow

## 3. Crear modelo

In [None]:

def create_model(input_shape):

    backbone = ResNet50(input_shape=input_shape,
                        weights='imagenet', include_top=False)
    # backbone.trainable = False
    for layer in backbone.layers[-20:]:
        layer.trainable = True

    # Modelo secuencial
    model = Sequential([
        backbone,
        GlobalAveragePooling2D(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='relu')
    ])

    # Compilar
    optimizer = Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    return model

## 4. Entrenar modelo

In [None]:
def train_model(model, train_data, val_data, batch_size=32, epochs=3):

    callbacks = [
        EarlyStopping(patience=3, restore_best_weights=True,
                      monitor='val_loss'),
        ModelCheckpoint('best_model.keras', save_best_only=True)
    ]

    history = model.fit(
        train_data,
        validation_data=val_data,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=callbacks,
        verbose=2
    )

    # Graficar desempeño
    plt.figure(figsize=(8, 5))
    plt.plot(history.history['loss'], label='train_loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.title('Pérdida (MAE) durante el entrenamiento')
    plt.xlabel('Época')
    plt.ylabel('MAE')
    plt.legend()
    plt.show()

    return history

## 5. Ejecutar

In [None]:
input_shape = (224, 224, 3)
path = '/datasets/faces'

eda(path)
train, test = load_data(path)
model = create_model(input_shape)
history = train_model(model, train, test)