In [None]:
import random
import cv2
import matplotlib.pyplot as plt
import os
import shutil
import glob
import yaml
import mlflow
from ultralytics import YOLO
from mlflow.tracking import MlflowClient
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

  from .autonotebook import tqdm as notebook_tqdm


## Environment Configuration

In [2]:
import os
import shutil
import glob
import yaml
import mlflow
from ultralytics import YOLO
from mlflow.tracking import MlflowClient

# 1. CONFIGURACIÓN DE RUTAS Y ENTORNO
# Definir la raíz del proyecto
NOTEBOOK_DIR = os.getcwd()
BASE_DIR = os.path.abspath(os.path.join(NOTEBOOK_DIR, ".."))

# Rutas de datos
RAW_DATA_DIR = os.path.join(BASE_DIR, "data", "base_dataset", "Living-Room-9")
PROCESSED_DATA_DIR = os.path.join(BASE_DIR, "data", "processed_dataset")
MODELS_DIR = os.path.join(BASE_DIR, "models")
MODELS_HISTORY = os.path.join(BASE_DIR, "models_history")

# Configuración de MLflow
MLFLOW_DB_PATH = os.path.join(BASE_DIR, "mlflow.db")
MLFLOW_ARTIFACTS_PATH = os.path.join(BASE_DIR, "mlruns")

REGISTERED_MODEL_NAME = "Furniture_Model_YOLO"

# Verificación de seguridad
if not os.path.exists(RAW_DATA_DIR):
    print(f"\nALERTA: No se encontró la carpeta RAW en:\n{RAW_DATA_DIR}")

# MAPA DE TRADUCCIÓN
ID_MAPPING = {
    12: 0, # Sofa
    11: 1, # Rug
    19: 2  # Pillows
}
CLASSES_NAMES = {0: 'Sofa', 1: 'Rug', 2: 'Pillows'}

# Crear directorios necesarios
for d in [PROCESSED_DATA_DIR, MODELS_DIR, MODELS_HISTORY, MLFLOW_ARTIFACTS_PATH]:
    os.makedirs(d, exist_ok=True)

# Config MlFlow
# 1. Conectar a la base de datos
db_uri = f"sqlite:///{MLFLOW_DB_PATH.replace(os.sep, '/')}"
mlflow.set_tracking_uri(db_uri)

# 2. Configurar Experimento
experiment_name = "Furniture_Detection_System"
try:
    mlflow.create_experiment(
        name=experiment_name,
        artifact_location=f"file:///{MLFLOW_ARTIFACTS_PATH.replace(os.sep, '/')}"
    )
except:
    pass 

mlflow.set_experiment(experiment_name)

  from .autonotebook import tqdm as notebook_tqdm
2026/02/03 23:54:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/03 23:54:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/03 23:54:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/03 23:54:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/03 23:54:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/03 23:54:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/03 23:54:05 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/03 23:54:05 INFO alembic.runtime.migration: Will assume non-transactional DDL.


<Experiment: artifact_location=('file:///c:/Users/andre/OneDrive/Documents/UPS/Inteligencia '
 'Artificial/YOLOv8_living-room_furniture/mlruns'), creation_time=1770106818785, experiment_id='1', last_update_time=1770106818785, lifecycle_stage='active', name='Furniture_Detection_System', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [3]:
def sanitize_dataset():
    """
    Procesa un split (train/valid):
    1. Lee etiquetas originales.
    2. Filtra solo las clases 11, 12, 19.
    3. Remapea a 0, 1, 2.
    4. Si la imagen tiene al menos un objeto válido, la copia al nuevo destino.
    """
    print(f" Limpiando y reconstruyendo dataset")
    
    # Borrar versión anterior para asegurar limpieza total
    if os.path.exists(PROCESSED_DATA_DIR):
        shutil.rmtree(PROCESSED_DATA_DIR)

    for split in ["train", "valid"]:
        src_imgs_dir = os.path.join(RAW_DATA_DIR, split, "images")
        if not os.path.exists(src_imgs_dir): continue

        # Crear carpetas destino
        os.makedirs(os.path.join(PROCESSED_DATA_DIR, split, "images"), exist_ok=True)
        os.makedirs(os.path.join(PROCESSED_DATA_DIR, split, "labels"), exist_ok=True)
        
        # Buscar imágenes
        src_imgs = glob.glob(os.path.join(src_imgs_dir, "*"))
        count = 0
        
        for img_path in src_imgs:
            # Nombre del archivo sin extensión
            basename = os.path.splitext(os.path.basename(img_path))[0]
            label_src = os.path.join(RAW_DATA_DIR, split, "labels", basename + ".txt")
            
            if os.path.exists(label_src):
                with open(label_src, 'r') as f:
                    lines = f.readlines()
                
                new_lines = []
                for line in lines:
                    parts = line.split()
                    # Si el ID está en nuestro mapa (12, 11, 19), lo traducimos y guardamos
                    if parts and int(parts[0]) in ID_MAPPING:
                        new_id = ID_MAPPING[int(parts[0])]
                        new_lines.append(f"{new_id} {' '.join(parts[1:])}\n")
                
                # Solo guardamos si la imagen tiene al menos un mueble de interés
                if new_lines:
                    shutil.copy(img_path, os.path.join(PROCESSED_DATA_DIR, split, "images"))
                    with open(os.path.join(PROCESSED_DATA_DIR, split, "labels", basename + ".txt"), 'w') as f:
                        f.writelines(new_lines)
                    count += 1
        print(f"{split}: {count} imágenes procesadas.")

sanitize_dataset()

 Limpiando y reconstruyendo dataset
train: 3530 imágenes procesadas.
valid: 149 imágenes procesadas.


In [3]:
# Crear el archivo YAML para el entrenamiento
yaml_path = os.path.join(PROCESSED_DATA_DIR, "data_clean.yaml")
yaml_content = {
    'path': PROCESSED_DATA_DIR.replace('\\', '/'),
    'train': 'train/images',
    'val': 'valid/images',
    'nc': 3,
    'names': CLASSES_NAMES
}


with open(yaml_path, 'w') as f:
    yaml.dump(yaml_content, f, sort_keys=False)

## Base Training

In [4]:
import random
import os
import shutil
import glob
import yaml
import mlflow
from ultralytics import YOLO
from mlflow.tracking import MlflowClient

def train_starved_base_model():
    
    # NOMBRE UNIFICADO PARA EVITAR ERRORES
    EXPERIMENT_FOLDER = "base_model"
    
    # 1. Crear un dataset temporal (Solo 50 fotos)
    sabotage_dir = os.path.join(BASE_DIR, "data", "sabotage_dataset")
    sabotage_yaml = os.path.join(sabotage_dir, "sabotage.yaml")
    
    # Limpieza previa
    if os.path.exists(sabotage_dir): shutil.rmtree(sabotage_dir)
    
    os.makedirs(os.path.join(sabotage_dir, "train", "images"), exist_ok=True)
    os.makedirs(os.path.join(sabotage_dir, "train", "labels"), exist_ok=True)

    # 2. Seleccionar SOLO 50 imágenes al azar
    all_train_imgs = glob.glob(os.path.join(PROCESSED_DATA_DIR, "train", "images", "*.jpg"))
    
    # Validación por si no hay imágenes
    if not all_train_imgs:
        print("Error: No hay imágenes en processed_dataset/train")
        return

    selected_imgs = random.sample(all_train_imgs, min(50, len(all_train_imgs)))
    
    print(f" Recortando dataset: De {len(all_train_imgs)} a solo {len(selected_imgs)} imágenes.")
    
    for img_path in selected_imgs:
        basename = os.path.basename(img_path)
        lbl_path = os.path.join(PROCESSED_DATA_DIR, "train", "labels", basename.replace('.jpg', '.txt'))
        
        if os.path.exists(lbl_path):
            shutil.copy(img_path, os.path.join(sabotage_dir, "train", "images", basename))
            shutil.copy(lbl_path, os.path.join(sabotage_dir, "train", "labels", basename.replace('.jpg', '.txt')))

    # 3. Crear YAML de sabotaje
    original_valid_dir = os.path.join(PROCESSED_DATA_DIR, "valid", "images")
    
    data_config = {
        'path': '', 
        'train': os.path.join(sabotage_dir, "train", "images"),
        'val': original_valid_dir, 
        'nc': 3,
        'names': CLASSES_NAMES
    }
    
    with open(sabotage_yaml, 'w') as f:
        yaml.dump(data_config, f)

    # 4. Entrenar
    model = YOLO("yolov8n.pt") 
    
    if mlflow.active_run(): mlflow.end_run()
    
    with mlflow.start_run(run_name="Base_Model") as run:
        
        results = model.train(
            data=sabotage_yaml,
            epochs=5,           
            imgsz=640,          
            batch=8,
            project=MODELS_HISTORY,
            name=EXPERIMENT_FOLDER, 
            exist_ok=True,
            plots=False
        )
        
        metrics = results.box
        print(f"\nmAP50 del Modelo base: {metrics.map50:.4f}")

        # Guardar como best.pt oficial}
        src_weights = os.path.join(MODELS_HISTORY, EXPERIMENT_FOLDER, "weights", "best.pt")
        dst_weights = os.path.join(MODELS_DIR, "best.pt")
        
        if os.path.exists(src_weights):
            shutil.copy(src_weights, dst_weights)
            print(f"Modelo guardado en: {dst_weights}")
            
            # Registrar
            mlflow.log_metric("map50", metrics.map50)
            mlflow.log_metric("map50-95", metrics.map)
            mlflow.log_artifact(dst_weights, artifact_path="weights")
            
            client = MlflowClient()
            try: client.create_registered_model(REGISTERED_MODEL_NAME)
            except: pass
            
            client.create_model_version(
                name=REGISTERED_MODEL_NAME,
                source=f"runs:/{run.info.run_id}/weights",
                run_id=run.info.run_id
            )
        else:
            print(f"Error: No se encontró el archivo en {src_weights}")

train_starved_base_model()

 Recortando dataset: De 3530 a solo 50 imágenes.
New https://pypi.org/project/ultralytics/8.4.10 available  Update with 'pip install -U ultralytics'
Ultralytics 8.4.8  Python-3.11.9 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=c:\Users\andre\OneDrive\Documents\UPS\Inteligencia Artificial\YOLOv8_living-room_furniture\data\sabotage_dataset\sabotage.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, end2end=None, epochs=5, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, ko

2026/02/03 03:21:13 INFO mlflow.tracking.fluent: Experiment with name 'c:\Users\andre\OneDrive\Documents\UPS\Inteligencia Artificial\YOLOv8_living-room_furniture\models_history' does not exist. Creating a new experiment.


[34m[1mMLflow: [0mlogging run_id(eb960f60c81448d199cb8ea6b0b2de2e) to sqlite:///c:/Users/andre/OneDrive/Documents/UPS/Inteligencia Artificial/YOLOv8_living-room_furniture/mlflow.db
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mC:\Users\andre\OneDrive\Documents\UPS\Inteligencia Artificial\YOLOv8_living-room_furniture\models_history\base_model[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K        1/5      1.12G      1.334      3.297      1.536          9        640: 100% ━━━━━━━━━━━━ 7/7 1.8it/s 3.9s0.3s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 10/10 5.1it/s 2.0s.2s
                   all        149        735    0.00741       0.64      0.226      0.149

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K        2/5  

## Conclusions

We successfully reduced the dataset classes to the 3 classes of our interest, eliminating images that did not contain any of the 3 mentioned classes. Additionally, due to the high performance of the YOLO model, we sabotaged its training, making it use only 50 images and train for only 5 epochs. Thanks to this, we managed to generate a baseline with a mAP50 of 44%, allowing future experiments to observe improvement and learning.