In [1]:
# %%
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
import joblib


In [2]:
# %%
WIN_SIZE = (64, 128)
BLOCK_SIZE = (16, 16)
BLOCK_STRIDE = (8, 8)
CELL_SIZE = (8, 8)
NBINS = 9

hog = cv2.HOGDescriptor(
    _winSize=WIN_SIZE,
    _blockSize=BLOCK_SIZE,
    _blockStride=BLOCK_STRIDE,
    _cellSize=CELL_SIZE,
    _nbins=NBINS
)

def extraer_hog(img):
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_resized = cv2.resize(img_gray, WIN_SIZE)
    return hog.compute(img_resized).flatten()


In [3]:
# %%
PATH_INRIA = "dataset/pedestrian"
PATH_TIKTOK = "dataset/poseHuman"
PATH_NEG = "dataset/no_pedestrians"


In [4]:
X_pos, y_pos = [], []

def cargar_positivos(path, augment=False):
    for img_name in tqdm(os.listdir(path)):
        img = cv2.imread(os.path.join(path, img_name))
        if img is None:
            continue
        X_pos.append(extraer_hog(img))
        y_pos.append(1)
        
        # Solo aplicamos espejo si se pide (para TikTok)
        if augment:
            img_flip = cv2.flip(img, 1)
            X_pos.append(extraer_hog(img_flip))
            y_pos.append(1)

print("Cargando positivos INRIA (Sin espejo)...")
cargar_positivos(PATH_INRIA, augment=False)

print("Cargando positivos TikTok (Con espejo)...")
cargar_positivos(PATH_TIKTOK, augment=True)

X_pos = np.array(X_pos)
y_pos = np.array(y_pos)

print(f"Total positivos: {len(X_pos)}")

Cargando positivos INRIA (Sin espejo)...


100%|██████████| 1269/1269 [00:01<00:00, 795.21it/s] 


Cargando positivos TikTok (Con espejo)...


100%|██████████| 227/227 [00:02<00:00, 85.50it/s]

Total positivos: 1723





In [5]:
# %%
neg_files = os.listdir(PATH_NEG)

neg_train_files, neg_test_files = train_test_split(
    neg_files,
    test_size=0.2,
    random_state=42
)


In [6]:
# %%
X_neg_train, y_neg_train = [], []
X_neg_test, y_neg_test = [], []

def generar_negativos(files, X_out, y_out, crops_per_img):
    for img_name in tqdm(files):
        img = cv2.imread(os.path.join(PATH_NEG, img_name))
        if img is None:
            continue

        h, w = img.shape[:2]
        if h < 128 or w < 64:
            continue

        for _ in range(crops_per_img):
            ry = np.random.randint(0, h - 128)
            rx = np.random.randint(0, w - 64)
            roi = img[ry:ry+128, rx:rx+64]
            X_out.append(extraer_hog(roi))
            y_out.append(0)

# calculamos cuántos crops por imagen para lograr ~2:1
target_neg = len(X_pos) * 2
crops_per_img = max(1, target_neg // len(neg_train_files))

print(f"Generando negativos (crops por imagen: {crops_per_img})")

generar_negativos(neg_train_files, X_neg_train, y_neg_train, crops_per_img)
generar_negativos(neg_test_files,  X_neg_test,  y_neg_test,  crops_per_img)

X_neg_train = np.array(X_neg_train)
y_neg_train = np.array(y_neg_train)
X_neg_test = np.array(X_neg_test)
y_neg_test = np.array(y_neg_test)

print(f"Negativos train: {len(X_neg_train)}")
print(f"Negativos test : {len(X_neg_test)}")


Generando negativos (crops por imagen: 2)


100%|██████████| 1427/1427 [00:06<00:00, 223.65it/s]
100%|██████████| 357/357 [00:01<00:00, 257.31it/s]


Negativos train: 2836
Negativos test : 712


In [7]:
# %%
X_pos_train, X_pos_test, y_pos_train, y_pos_test = train_test_split(X_pos, y_pos, test_size=0.2, random_state=42)

X_train = np.vstack([X_pos_train, X_neg_train])
y_train = np.concatenate([y_pos_train, y_neg_train])

X_test = np.vstack([X_pos_test, X_neg_test])
y_test = np.concatenate([y_pos_test, y_neg_test])


In [8]:
# %%
svm = LinearSVC(
    C=0.01,
    max_iter=5000,
    class_weight="balanced"
)

print("Entrenando SVM inicial...")
svm.fit(X_train, y_train)

print("Evaluación inicial:")
print(classification_report(y_test, svm.predict(X_test)))


Entrenando SVM inicial...
Evaluación inicial:
              precision    recall  f1-score   support

           0       0.96      0.95      0.96       712
           1       0.90      0.92      0.91       345

    accuracy                           0.94      1057
   macro avg       0.93      0.94      0.93      1057
weighted avg       0.94      0.94      0.94      1057



In [9]:
# %%
svm_weights = np.hstack([svm.coef_.ravel(), svm.intercept_])
hog.setSVMDetector(svm_weights)


In [10]:
# %%
def hard_negative_mining(hog_detector, files, max_per_img=5):
    hard_feats = []

    print("Buscando hard negatives...")
    for img_name in tqdm(files):
        img = cv2.imread(os.path.join(PATH_NEG, img_name))
        if img is None:
            continue

        rects, _ = hog_detector.detectMultiScale(
            img,
            hitThreshold=0.0,
            winStride=(8, 8),
            scale=1.05
        )

        for (x, y, w, h) in rects[:max_per_img]:
            roi = img[y:y+h, x:x+w]
            if roi.shape[0] >= 128 and roi.shape[1] >= 64:
                hard_feats.append(extraer_hog(roi))

    return np.array(hard_feats)


In [11]:

svm_iter = svm

for i in range(5):
    print(f"\n--- Ronda de Hard Mining {i+1}/5 ---")
    
    # IMPORTANTE: Asegúrate de que 'hog' tenga el detector actual seteado
    svm_weights = np.hstack([svm_iter.coef_.ravel(), svm_iter.intercept_])
    hog.setSVMDetector(svm_weights)

    # Buscamos Hard Negatives con el detector actual
    X_hard = hard_negative_mining(hog, neg_train_files, max_per_img=10)
    
    if len(X_hard) == 0:
        print("No se encontraron más hard negatives. Terminando.")
        break
        
    print(f"Ronda {i+1}: Se encontraron {len(X_hard)} hard negatives nuevos.")
    
    # Agregamos los nuevos difíciles al set de entrenamiento
    X_train = np.vstack([X_train, X_hard])
    y_train = np.concatenate([y_train, np.zeros(len(X_hard))])
    
    # Reentrenamos con el conjunto extendido
    print("Reentrenando SVM...")
    svm_iter.fit(X_train, y_train)
    
    print(f"Evaluación Ronda {i+1}:")
    print(classification_report(y_test, svm_iter.predict(X_test)))

# Actualizamos la variable global 'svm' al final
svm = svm_iter
print("\nEntrenamiento final completado.")


--- Ronda de Hard Mining 1/5 ---
Buscando hard negatives...


100%|██████████| 1427/1427 [02:38<00:00,  9.01it/s]


Ronda 1: Se encontraron 9559 hard negatives nuevos.
Reentrenando SVM...
Evaluación Ronda 1:
              precision    recall  f1-score   support

           0       0.92      0.99      0.95       712
           1       0.97      0.83      0.89       345

    accuracy                           0.94      1057
   macro avg       0.95      0.91      0.92      1057
weighted avg       0.94      0.94      0.93      1057


--- Ronda de Hard Mining 2/5 ---
Buscando hard negatives...


100%|██████████| 1427/1427 [01:03<00:00, 22.54it/s]


Ronda 2: Se encontraron 6360 hard negatives nuevos.
Reentrenando SVM...
Evaluación Ronda 2:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95       712
           1       0.99      0.79      0.88       345

    accuracy                           0.93      1057
   macro avg       0.95      0.89      0.91      1057
weighted avg       0.93      0.93      0.93      1057


--- Ronda de Hard Mining 3/5 ---
Buscando hard negatives...


100%|██████████| 1427/1427 [00:51<00:00, 27.80it/s]


Ronda 3: Se encontraron 4684 hard negatives nuevos.
Reentrenando SVM...
Evaluación Ronda 3:
              precision    recall  f1-score   support

           0       0.89      1.00      0.94       712
           1       0.99      0.75      0.85       345

    accuracy                           0.92      1057
   macro avg       0.94      0.87      0.90      1057
weighted avg       0.92      0.92      0.91      1057


--- Ronda de Hard Mining 4/5 ---
Buscando hard negatives...


100%|██████████| 1427/1427 [00:46<00:00, 30.90it/s]


Ronda 4: Se encontraron 3550 hard negatives nuevos.
Reentrenando SVM...
Evaluación Ronda 4:
              precision    recall  f1-score   support

           0       0.88      1.00      0.94       712
           1       0.99      0.73      0.84       345

    accuracy                           0.91      1057
   macro avg       0.94      0.86      0.89      1057
weighted avg       0.92      0.91      0.90      1057


--- Ronda de Hard Mining 5/5 ---
Buscando hard negatives...


100%|██████████| 1427/1427 [00:51<00:00, 27.82it/s]


Ronda 5: Se encontraron 2908 hard negatives nuevos.
Reentrenando SVM...
Evaluación Ronda 5:
              precision    recall  f1-score   support

           0       0.88      1.00      0.93       712
           1       1.00      0.71      0.83       345

    accuracy                           0.91      1057
   macro avg       0.94      0.86      0.88      1057
weighted avg       0.92      0.91      0.90      1057


Entrenamiento final completado.


In [12]:
# %%
joblib.dump(svm, "hog_svm_final.pkl")

final_weights = np.hstack([svm.coef_.ravel(), svm.intercept_])
np.save("hog_svm_weights.npy", final_weights)

print("Modelo y pesos guardados.")


Modelo y pesos guardados.


### Probar


In [24]:
import cv2
import os
import numpy as np
import joblib

# Parche Manjaro / Wayland
os.environ["QT_QPA_PLATFORM"] = "xcb"

def probar_video_sensible_custom(path_video, modelo_path):
    # ===============================
    # 1. Configurar HOG (IGUAL al entrenamiento)
    # ===============================
    WIN_SIZE = (64, 128)
    BLOCK_SIZE = (16, 16)
    BLOCK_STRIDE = (8, 8)
    CELL_SIZE = (8, 8)
    NBINS = 9

    hog = cv2.HOGDescriptor(
        _winSize=WIN_SIZE,
        _blockSize=BLOCK_SIZE,
        _blockStride=BLOCK_STRIDE,
        _cellSize=CELL_SIZE,
        _nbins=NBINS
    )

    # ===============================
    # 2. Cargar modelo entrenado
    # ===============================
    svm = joblib.load(modelo_path)

    svm_weights = np.hstack([
        svm.coef_.ravel(),
        svm.intercept_
    ])

    hog.setSVMDetector(svm_weights)
    print("✔ Detector HOG-SVM personalizado cargado")

    # ===============================
    # 3. Video o cámara
    # ===============================
    cap = cv2.VideoCapture(path_video)  # usa 0 para webcam

    if not cap.isOpened():
        print(f"❌ No se pudo abrir: {path_video}")
        return

    print("Procesando... Presiona 'q' para salir.")

    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # ===============================
            # 4. Detección sensible
            # ===============================
            rects, weights = hog.detectMultiScale(
                frame,
                hitThreshold=0.1,     # prueba 0.3 – 0.6
                winStride=(8, 8),     # sensible a movimiento
                padding=(8, 8),
                scale=1.05
            )

            if len(rects) > 0:
                boxes = rects.tolist()
                scores = weights.flatten().tolist()

                # ===============================
                # 5. Non-Maximum Suppression
                # ===============================
                indices = cv2.dnn.NMSBoxes(
                    boxes,
                    scores,
                    score_threshold=0.4,
                    nms_threshold=0.4
                )

                if len(indices) > 0:
                    for i in indices.flatten():
                        if scores[i] > 0.6:
                            x, y, w, h = boxes[i]
                            cv2.rectangle(
                                frame,
                                (x, y),
                                (x + w, y + h),
                                (0, 255, 0),
                                2
                            )
                            cv2.putText(
                                frame,
                                f"Persona {scores[i]:.2f}",
                                (x, y - 10),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                0.5,
                                (0, 255, 0),
                                2
                            )

            cv2.imshow("HOG Person Detector (Custom)", frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    except Exception as e:
        print(f"⚠ Error: {e}")

    finally:
        cap.release()
        cv2.destroyAllWindows()
        for _ in range(10):
            cv2.waitKey(1)
        print("✔ Video liberado correctamente")


In [None]:
probar_video_sensible_custom(
    2,              # o 0 para webcam
    "hog_svm_final.pkl"    # tu modelo entrenado
)


# probar_video_sensible_custom(
#     "b4.mp4",              # o 0 para webcam
#     "hog_svm_final.pkl"    # tu modelo entrenado
# )


✔ Detector HOG-SVM personalizado cargado
Procesando... Presiona 'q' para salir.
✔ Video liberado correctamente


### Default detector


In [15]:
# import cv2

# # Forzar X11 por si las moscas en Manjaro
# import os
# os.environ["QT_QPA_PLATFORM"] = "xcb"

# def prueba_ultra_sensible(path_img):
#     hog = cv2.HOGDescriptor()
#     hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
    
#     img = cv2.imread(path_img)
#     if img is None:
#         print("Ni siquiera cargó la imagen, Karen. Revisa el path.")
#         return

#     # 1. Bajamos la escala a 1.01 (es lento pero busca 'pixel por pixel')
#     # 2. Quitamos el hitThreshold (0 es el default, muy sensible)
#     rects, weights = hog.detectMultiScale(img, 
#                                           winStride=(4, 4), # Paso más pequeño
#                                           padding=(8, 8), 
#                                           scale=1.01) # Escala muy fina

#     print(f"Detecciones crudas encontradas: {len(rects)}")

#     # DIBUJAMOS TODO sin filtrar con groupRectangles para ver qué está pasando
#     for (x, y, w, h) in rects:
#         cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 1) # Rojo = Crudo

#     cv2.imshow('Detecciones sin filtro (Rojo)', img)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()
#     for i in range(5): cv2.waitKey(1)

# # Prueba con esa foto del INRIA que te falló
# # prueba_ultra_sensible('ruta_de_la_traicionera.jpg')
# prueba_ultra_sensible('crop001008a.png')

In [16]:
# import cv2
# import os
# import numpy as np

# # Parche para Manjaro/Wayland
# os.environ["QT_QPA_PLATFORM"] = "xcb"

# def probar_video_sensible(path_video):
#     # 1. Configurar detector
#     hog = cv2.HOGDescriptor()
#     hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
    
#     cap = cv2.VideoCapture(path_video)
#     # cap = cv2.VideoCapture(2) 

    
#     if not cap.isOpened():
#         print(f"Error: No se pudo abrir el video en {path_video}")
#         return

#     print("Procesando... Presiona 'q' para salir.")

#     try:
#         while cap.isOpened():
#             ret, frame = cap.read()
#             if not ret:
#                 break

#             # 2. Detección Sensible Cruda
#             # hitThreshold: 0.0 es el más sensible (mucho ruido), 0.2 es un punto medio.
#             # winStride: (4,4) ayuda a que no se le "escape" la persona entre saltos.
#             rects, weights = hog.detectMultiScale(frame, 
#                                                   hitThreshold=0.3, # Ajusta entre 0.0 y 0.3
#                                                   winStride=(4, 4), 
#                                                   padding=(8, 8), 
#                                                   scale=1.03)
            
#             # ... después de rects, weights = hog.detectMultiScale(...)

#             if len(rects) > 0:
#                 # Convertimos a listas para que NMSBoxes las acepte
#                 boxes = rects.tolist()
#                 scores = weights.flatten().tolist()
                
#                 # score_threshold: ignora detecciones muy débiles (sube este si hay mucha basura)
#                 # nms_threshold: qué tanto se deben encimar los cuadros para unirse (0.3 es estándar)
#                 indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=0.3, nms_threshold=0.4)
                
#                 # Solo dibujamos los cuadros que pasaron el filtro NMS
#                 if len(indices) > 0:
#                     for i in indices.flatten():
#                         if scores[i] > 0.8:
#                             x, y, w, h = boxes[i]
#                             cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
#                             cv2.putText(frame, f"Persona {scores[i]:.2f}", (x, y-10), 
#                                         cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            

#             # 3. Dibujar detecciones crudas
#             # Verás muchos cuadros sobre la misma persona; eso indica que el detector está SEGURO.
#             # for (x, y, w, h) in rects:
#             #     cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 1)
#             #     # Opcional: poner el peso/confianza para depurar
#             #     cv2.putText(frame, "Ok", (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)

#             cv2.imshow('HOG Crudo (Sin NMS)', frame)

#             # Control de salida y refresco de ventana
#             if cv2.waitKey(1) & 0xFF == ord('q'):
#                 break
                
#     except Exception as e:
#         print(f"Sucedió un error: {e}")
#     finally:
#         # 4. Limpieza total para evitar ventanas colgadas en Manjaro
#         cap.release()
#         cv2.destroyAllWindows()
#         for i in range(10): cv2.waitKey(1)
#         print("Cámara/Video liberado.")

# # USO
# probar_video_sensible('b4.mp4')