In [1]:
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
# Ruta al dataset
DATASET_DIR = "/Users/damian/Universidad/redes_neuronales/reto/smiles"
# comprobar la ruta
print("DATASET_DIR existe?", os.path.exists(DATASET_DIR))

DATASET_DIR existe? True


In [3]:
#   FUNCIÓN REUTILIZABLE — CARGA IMÁGENES Y HACE EL SPLIT
def cargar_y_dividir_dataset(
    rootpath=DATASET_DIR,
    target_size=(64, 64),
    test_size=0.2,
    random_state=1234
):
    smile_dir = os.path.join(rootpath, "smile")
    not_smile_dir = os.path.join(rootpath, "not_smile")

    X = []
    y = []

    # Cargar clase: SMILE = 1
    for fname in os.listdir(smile_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".ppm")):
            continue

        path = os.path.join(smile_dir, fname)
        img = image.load_img(
            path, target_size=target_size, color_mode="rgb"
        )
        arr = image.img_to_array(img) / 255.0
        X.append(arr)
        y.append(1)

    # Cargar clase: NON SMILE = 0
    for fname in os.listdir(not_smile_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".ppm")):
            continue

        path = os.path.join(not_smile_dir, fname)
        img = image.load_img(
            path, target_size=target_size, color_mode="rgb"
        )
        arr = image.img_to_array(img) / 255.0
        X.append(arr)
        y.append(0)

    X = np.array(X)
    y = np.array(y)

    print("Total de imágenes cargadas:", X.shape[0])
    print("Dimensión de imagen:", X.shape[1:])

    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=test_size,
        stratify=y,
        random_state=random_state
    )

    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat  = X_test.reshape(X_test.shape[0], -1)

    return (
        X_train, X_test,
        y_train, y_test,
        X_train_flat, X_test_flat
    )

In [4]:
from PIL import Image

In [5]:
# Random Forest
if __name__ == "__main__":
    (
        X_train, X_test,
        y_train, y_test,
        X_train_flat, X_test_flat
    ) = cargar_y_dividir_dataset(
        target_size=(64, 64)   # mismo tamaño que usarás en la CNN
    )

    print("Tamaño train:", X_train_flat.shape)
    print("Tamaño test:", X_test_flat.shape)

    rf = RandomForestClassifier(
        n_estimators=300,
        max_depth=None,
        random_state=1234,
        n_jobs=-1
    )

    print("\nEntrenando Random Forest...")
    rf.fit(X_train_flat, y_train)

    print("\nEvaluación Random Forest:")
    y_pred = rf.predict(X_test_flat)

    print("\nMatriz de confusión:")
    print(confusion_matrix(y_test, y_pred))

    print("\nReporte de clasificación:")
    print(
        classification_report(
            y_test,
            y_pred,
            target_names=["no_smile", "smile"]
        )
    )

Total de imágenes cargadas: 1203
Dimensión de imagen: (64, 64, 3)
Tamaño train: (962, 12288)
Tamaño test: (241, 12288)

Entrenando Random Forest...

Evaluación Random Forest:

Matriz de confusión:
[[111  10]
 [ 18 102]]

Reporte de clasificación:
              precision    recall  f1-score   support

    no_smile       0.86      0.92      0.89       121
       smile       0.91      0.85      0.88       120

    accuracy                           0.88       241
   macro avg       0.89      0.88      0.88       241
weighted avg       0.89      0.88      0.88       241

