In [1]:
from roboflow import Roboflow
import supervision as sv
import cv2
import numpy as np
import os




In [2]:
def read_yolo_labels(label_path, img_shape):
    """Read YOLO labels (x_center, y_center, width, height) normalized -> absolute xyxy."""
    H, W = img_shape[:2]
    boxes = []
    classes = []
    with open(label_path, "r") as f:
        for line in f:
            # print("Label line: ", line)
            parts = list(map(float, line.strip().split()))
            cls, x, y, w, h = parts[:5]  # ignore polygons if exist
            x1 = (x - w / 2) * W
            y1 = (y - h / 2) * H
            x2 = (x + w / 2) * W
            y2 = (y + h / 2) * H
            boxes.append([x1, y1, x2, y2])
            classes.append(int(cls))
    return np.array(boxes), np.array(classes)

In [3]:
def roboflow_to_xyxy(preds):
    """Convert Roboflow predictions (x, y, width, height) to xyxy absolute boxes."""
    boxes, classes, confs = [], [], []
    for p in preds:
        x1 = p["x"] - p["width"] / 2
        y1 = p["y"] - p["height"] / 2
        x2 = p["x"] + p["width"] / 2
        y2 = p["y"] + p["height"] / 2
        boxes.append([x1, y1, x2, y2])
        classes.append(p["class_id"])
        confs.append(p["confidence"])
    return np.array(boxes), np.array(classes), np.array(confs)


In [4]:
def roboflow_pred_to_yolo(pred, img_width, img_height):
    x_center = pred["x"] / img_width
    y_center = pred["y"] / img_height
    width = pred["width"] / img_width
    height = pred["height"] / img_height
    return f"{pred['class_id']} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"


In [5]:
from supervision.detection.utils.iou_and_nms import box_iou_batch

def compute_miou(pred_boxes, gt_boxes):
    """Compute mean IoU between predicted and ground-truth boxes."""
    if len(pred_boxes) == 0 or len(gt_boxes) == 0:
        return 0.0
    ious = box_iou_batch(pred_boxes, gt_boxes)
    return np.mean(ious.max(axis=1))

In [6]:
from supervision.metrics.mean_average_precision import MeanAveragePrecision

def calculate_map(detections_pred, detections_gt):
    """Calculate mAP (mean average precision) for object detection."""
    # Create the metric
    map_metric = MeanAveragePrecision()

    # Add your predictions and ground-truth Detections
    map_metric.update(predictions=detections_pred, targets=detections_gt)

    # Compute results
    return map_metric.compute()

In [7]:
from supervision.metrics.detection import ConfusionMatrix

def calculate_precision_recall(predictions_pred, predictions_gt, class_names, iou_threshold=0.5, conf_threshold=0.25):
    """
    Compute Precision and Recall using Supervision v0.26.1 ConfusionMatrix.
    """
    cm = ConfusionMatrix.from_detections(
        predictions=[predictions_pred],
        targets=[predictions_gt],
        classes=class_names,
        conf_threshold=conf_threshold,
        iou_threshold=iou_threshold,
    )
    M = cm.matrix
    # matrix shape = (n_classes+1, n_classes+1)
    # last row/col = background/missed detections

    num_classes = len(class_names)
    tp = np.diag(M[:num_classes, :num_classes])           # true positives per class
    fp = M[:num_classes, num_classes]                     # false positives per class
    fn = M[num_classes, :num_classes]                     # false negatives per class

    precision = tp / (tp + fp + 1e-16)
    recall = tp / (tp + fn + 1e-16)
    f1 = 2 * precision * recall / (precision + recall + 1e-16)

    # Replace NaN (if no samples) with 0
    precision = np.nan_to_num(precision)
    recall = np.nan_to_num(recall)
    f1 = np.nan_to_num(f1)

    mean_precision = precision.mean()
    mean_recall = recall.mean()
    mean_f1 = f1.mean()

    print(f"Confusion Matrix:\n{M}")
    print(f"Per-class precision: {precision}")
    print(f"Per-class recall:    {recall}")
    print(f"Per-class F1:        {f1}")
    print(f"Mean Precision: {mean_precision:.4f}, Mean Recall: {mean_recall:.4f}, Mean F1: {mean_f1:.4f}")

    return mean_precision, mean_recall, mean_f1
    return cm


In [8]:
# --- CONFIGURATION ---
API_KEY = "ooPxxzLT1xgs8CWEiMjY"
PROJECT = "container-number-pmov4"
VERSION = 4
CONFIDENCE = 40
OVERLAP = 30


BASE_DIR = "/home/emma/facultad/pps/validacion/numbers/dataset/test"
IMAGES_DIR = os.path.join(BASE_DIR, "images")
LABELS_DIR = os.path.join(BASE_DIR, "labels")
print(LABELS_DIR)
OUTPUT_DIR = f"/home/emma/facultad/pps/validacion/numbers/{PROJECT}"

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "images"), exist_ok=True)


/home/emma/facultad/pps/validacion/numbers/dataset/test/labels


In [9]:
def replace_trailing_extension(string, cases, new_extension):
    for case in cases:
        if string.endswith(case):
            return string[:-len(case)] + new_extension
    return string

def predict_model():
    # --- INIT MODEL ---
    rf = Roboflow(api_key=API_KEY)
    project = rf.workspace().project(PROJECT)
    model = project.version(VERSION).model
    image_files = [f for f in os.listdir(IMAGES_DIR) if f.endswith((".jpg", ".png", ".jpeg"))]
    
    total_ious = []

    for idx, img_name in enumerate(image_files):
        image_path = os.path.join(IMAGES_DIR, img_name)
        label_path = os.path.join(LABELS_DIR, replace_trailing_extension(img_name, [".jpg", ".png", ".jpeg"], ".txt"))
        print(f"Image {idx} of {len(image_files)}: LABEL PATH {label_path}")
        image = cv2.imread(image_path)
        if image is None:
            print(f"⚠️ Skipping unreadable image: {img_name}")
            continue

        if not os.path.exists(label_path):
            print(f"⚠️ Missing label for {img_name}")
            continue

        # Predict
        result = model.predict(image_path, confidence=CONFIDENCE, overlap=OVERLAP).json()

        # Save the prediction result to OUTPUT_DIR/predictions
        predictions_dir = os.path.join(OUTPUT_DIR, "predictions")
        os.makedirs(predictions_dir, exist_ok=True)
        pred_filename = f"{os.path.splitext(img_name)[0]}.json"
        pred_out_path = os.path.join(predictions_dir, pred_filename)
        with open(pred_out_path, "w") as f:
            import json
            json.dump(result, f, indent=2)

        preds = result.get("predictions", [])

        if not preds:
            print(f"⚠️ No detections for {img_name}")
            continue

        gt_boxes, gt_classes = read_yolo_labels(label_path, image.shape)
        pred_boxes, pred_classes, pred_conf = roboflow_to_xyxy(preds)
        miou = compute_miou(pred_boxes, gt_boxes)
        total_ious.append(miou)

        # Annotate
        gt_det = sv.Detections(xyxy=gt_boxes, class_id=gt_classes)
        pred_det = sv.Detections(xyxy=pred_boxes, confidence=pred_conf, class_id=pred_classes)
        gt_annotator = sv.BoxAnnotator(color=sv.Color.GREEN) ## GREEN
        pred_annotator = sv.BoxAnnotator(color=sv.Color.RED) ## RED
        label_annotator = sv.LabelAnnotator(text_color=sv.Color.WHITE, text_position=sv.Position.TOP_LEFT)

        annotated = gt_annotator.annotate(scene=image.copy(), detections=gt_det)
        annotated = pred_annotator.annotate(scene=annotated, detections=pred_det)
        labels = [f"{p['class']} ({p['confidence']:.2f})" for p in preds]
        annotated = label_annotator.annotate(scene=annotated, detections=pred_det, labels=labels)

        out_path = os.path.join(OUTPUT_DIR,"images", f"{os.path.splitext(img_name)[0]}_annotated.jpg")
        cv2.imwrite(out_path, annotated)

    # --- SUMMARY ---
    if total_ious:
        print(f"\n✅ Processed {len(total_ious)} images.")
        print(f"📊 Mean IoU across dataset: {np.mean(total_ious):.4f}")
    else:
        print("No valid images processed.")

In [10]:
predict_model()

loading Roboflow workspace...
loading Roboflow project...
Image 0 of 855: LABEL PATH /home/emma/facultad/pps/validacion/numbers/dataset/test/labels/IMG_CON_CRANE_DOOR_A_20210831131320_313102_jpg.rf.fe75ffa7f387b6d299d99c259bbd80f1.txt
Image 1 of 855: LABEL PATH /home/emma/facultad/pps/validacion/numbers/dataset/test/labels/DFSU1039309-1-_jpg.rf.a19b8e8810fbd06ffd9523db0579d72b.txt
⚠️ No detections for DFSU1039309-1-_jpg.rf.a19b8e8810fbd06ffd9523db0579d72b.jpg
Image 2 of 855: LABEL PATH /home/emma/facultad/pps/validacion/numbers/dataset/test/labels/BSIU9171399-1-_jpg.rf.dcf7e2f43372a5d3255cfd86081d07d8.txt
Image 3 of 855: LABEL PATH /home/emma/facultad/pps/validacion/numbers/dataset/test/labels/IMG222_jpg.rf.c32bdf55e8746d890120710932035371.txt
Image 4 of 855: LABEL PATH /home/emma/facultad/pps/validacion/numbers/dataset/test/labels/BMOU4840813-3-_638x480_jpg.rf.18d06bb1176c4a78669f6232149db95d.txt
Image 5 of 855: LABEL PATH /home/emma/facultad/pps/validacion/numbers/dataset/test/labels

In [11]:
import os
import json
import cv2
import numpy as np
from tqdm import tqdm
import supervision as sv

# ---------------------------------------------------------------------
# --- EXISTING HELPER FUNCTIONS (keep exactly as you provided) ---
# ---------------------------------------------------------------------

# read_yolo_labels, roboflow_to_xyxy, roboflow_pred_to_yolo,
# compute_miou, calculate_map, calculate_precision_recall
# (we assume these are already imported exactly as you pasted them)
# ---------------------------------------------------------------------

def evaluate_predictions(predictions_dir, labels_dir, class_names):
    """
    Evaluate all Roboflow prediction JSONs against YOLO-format ground-truth labels.
    """

    pred_files = [
        f for f in os.listdir(predictions_dir)
        if f.endswith(".json")
    ]

    print(f"📂 Found {len(pred_files)} prediction files in: {predictions_dir}")
    if not pred_files:
        print("❌ No predictions found.")
        return None

    all_mious = []
    image_metrics = []

    all_pred_detections = []
    all_gt_detections = []

    processed = 0
    skipped = 0

    for pred_file in tqdm(pred_files, desc="Evaluating images"):
        base_name = os.path.splitext(pred_file)[0]
        pred_path = os.path.join(predictions_dir, pred_file)
        label_path = os.path.join(labels_dir, f"{base_name}.txt")

        if not os.path.exists(label_path):
            print(f"⚠️ Missing label for {base_name}")
            skipped += 1
            continue

        # --- Load prediction JSON ---
        with open(pred_path, "r") as f:
            pred_data = json.load(f)

        preds = pred_data.get("predictions", [])
        if not preds:
            print(f"⚠️ No predictions for {base_name}")
            skipped += 1
            continue

        # --- Infer image shape ---
        # If Roboflow JSON includes width/height, use them directly
        img_width = int(preds[0]["image"]["width"]) if "image" in preds[0] else int(preds[0]["width"])
        img_height = int(preds[0]["image"]["height"]) if "image" in preds[0] else int(preds[0]["height"])
        img_shape = (img_height, img_width, 3)

        # --- Parse labels & predictions ---
        gt_boxes, gt_classes = read_yolo_labels(label_path, img_shape)
        pred_boxes, pred_classes, pred_conf = roboflow_to_xyxy(preds)

        # --- Build supervision Detections ---
        gt_det = sv.Detections(xyxy=gt_boxes, class_id=gt_classes)
        pred_det = sv.Detections(xyxy=pred_boxes, confidence=pred_conf, class_id=pred_classes)

        # --- Per-image IoU ---
        miou = compute_miou(pred_boxes, gt_boxes)
        all_mious.append(miou)

        # --- Per-image metrics ---
        if len(pred_boxes) > 0 and len(gt_boxes) > 0:
            precision, recall, f1 = calculate_precision_recall(pred_det, gt_det, class_names)
        else:
            precision, recall, f1 = 0.0, 0.0, 0.0

        image_metrics.append({
            "image": base_name,
            "miou": miou,
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "num_pred": len(pred_boxes),
            "num_gt": len(gt_boxes),
        })

        # Store detections for global metrics
        all_pred_detections.append(pred_det)
        all_gt_detections.append(gt_det)

        processed += 1

    print(f"\n📊 Processed: {processed}, Skipped: {skipped}")

    if not all_pred_detections or not all_gt_detections:
        print("❌ No valid detections for evaluation.")
        return None

    # ---------------------------------------------------------------------
    # --- Merge detections for global metrics ---
    # ---------------------------------------------------------------------
    def merge_detections(dets_list):
        """Concatenate multiple sv.Detections into one."""
        if not dets_list:
            return sv.Detections.empty()
        xyxy = np.concatenate([d.xyxy for d in dets_list if len(d.xyxy) > 0], axis=0)
        class_id = np.concatenate([d.class_id for d in dets_list if d.class_id is not None], axis=0)
        confidence = np.concatenate(
            [d.confidence for d in dets_list if d.confidence is not None], axis=0
        ) if any(d.confidence is not None for d in dets_list) else None
        return sv.Detections(xyxy=xyxy, class_id=class_id, confidence=confidence)

    merged_pred = merge_detections(all_pred_detections)
    merged_gt = merge_detections(all_gt_detections)

    # ---------------------------------------------------------------------
    # --- Global metrics ---
    # ---------------------------------------------------------------------
    map_result = calculate_map(merged_pred, merged_gt)
    overall_precision, overall_recall, overall_f1 = calculate_precision_recall(
        merged_pred, merged_gt, class_names
    )
    mean_iou = float(np.mean(all_mious)) if all_mious else 0.0

    # ---------------------------------------------------------------------
    # --- Final results dictionary ---
    # ---------------------------------------------------------------------
    results = {
        "overall_metrics": {
            "mAP@50": getattr(map_result, "map50", np.nan),
            "mAP@75": getattr(map_result, "map75", np.nan),
            "mAP@50-95": getattr(map_result, "map", np.nan),
            "precision": overall_precision,
            "recall": overall_recall,
            "f1": overall_f1,
            "mean_iou": mean_iou,
            "total_images": processed,
            "skipped_images": skipped,
        },
        "per_image_metrics": image_metrics,
        "class_names": class_names,
    }

    print("\n✅ Evaluation complete.")
    print(f"📈 Mean IoU: {mean_iou:.4f}")
    print(f"📈 Precision: {overall_precision:.4f}, Recall: {overall_recall:.4f}, F1: {overall_f1:.4f}")
    print(f"📈 mAP@50: {map_result.map50:.4f}, mAP@75: {map_result.map75:.4f}")

    return results

In [12]:
PREDICTIONS_DIR = os.path.join(OUTPUT_DIR, "predictions")
CLASS_NAMES = ["Healthy Container"]
results = evaluate_predictions(PREDICTIONS_DIR, LABELS_DIR, CLASS_NAMES)
if results:
    import pandas as pd
    df = pd.DataFrame(results["per_image_metrics"])
    df.to_csv(os.path.join(OUTPUT_DIR, "evaluation_report.csv"), index=False)
    print("💾 Saved detailed metrics to evaluation_report.csv")

📂 Found 855 prediction files in: /home/emma/facultad/pps/validacion/numbers/container-number-pmov4/predictions


Evaluating images:  18%|█▊        | 150/855 [00:00<00:00, 1489.91it/s]

Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
⚠️ No predictions for Maersk_container_with_Fireworks_jpg.rf.c9075d95b5edc286a91067143d6e200a
Confusion Matrix:
[[0. 2.]
 [0. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
⚠️ No predictions for 1-141817001-OCR-RF-D01_jpg.rf.f5f0035564d4bb90f4a3145ceb4af873
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
⚠️ No predictions for 8ft-standard_jpg.rf.77fc436ba4d919cabc9197bc0a570f76
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-c

Evaluating images:  52%|█████▏    | 446/855 [00:00<00:00, 1438.98it/s]

Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 2.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [0. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 2.]
 [0. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean 

Evaluating images:  86%|████████▌ | 732/855 [00:00<00:00, 1389.24it/s]

Confusion Matrix:
[[0. 2.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [0. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [0. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean 

Evaluating images: 100%|██████████| 855/855 [00:00<00:00, 1401.73it/s]


Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Confusion Matrix:
[[0. 1.]
 [1. 1.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000

📊 Processed: 798, Skipped: 57
Confusion Matrix:
[[  0. 885.]
 [670. 801.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000

✅ Evaluation complete.
📈 Mean IoU: 0.0000
📈 Precision: 0.0000, Recall: 0.0000, F1: 0.0000
📈 mAP@50: 0.0000, mAP@75: 0.0000
💾 Saved detailed metrics to evaluation_report.csv


In [13]:
import pandas as pd
from datetime import datetime

def generate_markdown_report(results, dataset_path, output_dir, project_name, version):
    """
    Generate a markdown report similar to model_validation_report_20251025_211928.md
    """
    if not results:
        print("❌ No results to generate report")
        return None
    
    df = pd.DataFrame(results["per_image_metrics"])
    
    # Calculate summary statistics
    metrics = {
        "Precision": {
            "mean": df["precision"].mean(),
            "std": df["precision"].std(),
            "min": df["precision"].min(),
            "max": df["precision"].max()
        },
        "Recall": {
            "mean": df["recall"].mean(),
            "std": df["recall"].std(),
            "min": df["recall"].min(),
            "max": df["recall"].max()
        },
        "Mean IoU": {
            "mean": df["miou"].mean(),
            "std": df["miou"].std(),
            "min": df["miou"].min(),
            "max": df["miou"].max()
        }
    }
    
    # Generate timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Generate markdown content
    markdown = f"""# Model Validation Report

**Generated on:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

## Dataset Information
- **Path:** Dataset: {dataset_path}
- **Total Images:** {results['overall_metrics']['total_images']}
- **Note:** All models are tested against the same common dataset

## Model Results

### Model: {project_name}/{version}

| Metric | Mean | Std Dev | Min | Max |
|--------|------|---------|-----|-----|
| Precision | {metrics['Precision']['mean']:.3f} | {metrics['Precision']['std']:.3f} | {metrics['Precision']['min']:.3f} | {metrics['Precision']['max']:.3f} |
| Recall | {metrics['Recall']['mean']:.3f} | {metrics['Recall']['std']:.3f} | {metrics['Recall']['min']:.3f} | {metrics['Recall']['max']:.3f} |
| Mean IoU | {metrics['Mean IoU']['mean']:.3f} | {metrics['Mean IoU']['std']:.3f} | {metrics['Mean IoU']['min']:.3f} | {metrics['Mean IoU']['max']:.3f} |

**Sample Count:** {results['overall_metrics']['total_images']}

## Model Comparison

| Model | Mean Precision | Mean Recall | Mean IoU | Sample Count |
|-------|----------------|-------------|----------|--------------|
| {project_name}/{version} | {metrics['Precision']['mean']:.3f} | {metrics['Recall']['mean']:.3f} | {metrics['Mean IoU']['mean']:.3f} | {results['overall_metrics']['total_images']} |

## Additional Metrics

- **mAP@50:** {results['overall_metrics']['mAP@50']:.3f}
- **mAP@75:** {results['overall_metrics']['mAP@75']:.3f}
- **mAP@50-95:** {results['overall_metrics']['mAP@50-95']:.3f}
- **F1 Score:** {results['overall_metrics']['f1']:.3f}
"""
    
    # Save markdown report
    report_filename = f"model_validation_report_{timestamp}.md"
    report_path = os.path.join(output_dir, report_filename)
    
    with open(report_path, "w") as f:
        f.write(markdown)
    
    print(f"📄 Generated markdown report: {report_filename}")
    print(f"📊 Report saved to: {report_path}")
    
    return report_path


# Generate the markdown report
if results:
    report_path = generate_markdown_report(
        results, 
        BASE_DIR, 
        OUTPUT_DIR, 
        PROJECT, 
        VERSION
    )
    if report_path:
        print("✅ Markdown report generated successfully!")


📄 Generated markdown report: model_validation_report_20251027_225900.md
📊 Report saved to: /home/emma/facultad/pps/validacion/numbers/container-number-pmov4/model_validation_report_20251027_225900.md
✅ Markdown report generated successfully!
