# Computing and Saving Test Metrics for Trained Models

In [None]:
# NOTE: run only once!
import os
os.chdir('..')

In [None]:
from pathlib import Path
import time
import pickle
import shutil
import polars as pl

from ultralytics import YOLO # type: ignore
from ultralytics import settings
from ultralytics.utils import metrics


In [None]:
PURPLE = "\033[0;35m"
END = "\033[0m"

In [None]:
# YOLO setup

# Set datasets (root) directory path
DATASETS_DIR = "A:\\datasets\\kandi"
RUNS_DIR = "./ultralytics/runs"
WEIGHTS_DIR = "./ultralytics/weights"

if not Path(DATASETS_DIR).exists():
    raise FileNotFoundError(f"Please set path to existing folder. Current doesn't exist: '{DATASETS_DIR}'")

# Set yaml file
YAML_FILE = "./YAML/test_PP_matti.yaml"
if not Path(YAML_FILE).exists():
    raise FileNotFoundError(f"Please set path to existing YAML file. Current doesn't exist: '{YAML_FILE}'")

# YOLO settings
settings.reset() # resets YOLO settings
settings.update({
    "datasets_dir": DATASETS_DIR,
    "runs_dir":     RUNS_DIR,
    "weights_dir":  WEIGHTS_DIR,
})
# settings

In [None]:
# Get model paths
filters = [
    # 'long_runs',
    # '150ep',
    # 'normal',
]
run_paths = sorted(
    Path('runs_puhti').rglob('best.pt'),
    key=lambda obj: str(obj),
)
run_paths = [ p.parent.parent for p in run_paths ]
for f in filters:
    run_paths = [ p for p in run_paths if f in str(p) ]

### STEP 1: compute metrics

In [None]:
# RUN: Compute metrics for test set
TEST_METRICS_FOLDER = "test_split_metrics"

REDO = True
for i, run_path in enumerate(run_paths):
    model_path = run_path / "weights/best.pt"
    name = str(run_path).replace("\\", "/")
    print(f"  [{i+1:>2}/{len(run_paths)}] VALIDATING: \"{PURPLE}{name}{END}\"")
    
    # paths and check
    results_dir = run_path / TEST_METRICS_FOLDER
    results_pkl = results_dir / "metrics.pkl"
    if results_pkl.exists() and not REDO:
        print('  ... skipping')
        continue
    
    # validate
    start = time.time()
    model = YOLO(model_path)
    results: metrics.DetMetrics = model.val(
        data = YAML_FILE,
        split = "test",
        task = "detect",
        project = run_path,
        name = TEST_METRICS_FOLDER,
    )
    print("  ... took {:.1f} sec".format(time.time()-start))

    # save pickle
    results_pkl.parent.mkdir(exist_ok=True)
    print(f"pickling results to: '{results_pkl}'")
    with open(results_pkl, "wb") as f:
        pickle.dump(results, f)
    
    # move saved images to desired dir
    matches = list(Path(RUNS_DIR).rglob(f"*{run_path}/{TEST_METRICS_FOLDER}"))
    if len(matches) == 0:
        raise FileExistsError(f"cant find images generated by .val().")
    images_path = matches[0]
    new_images_path = Path("./") / images_path.relative_to(f"{RUNS_DIR}/detect") / "images"
    print('moving images dir to:', new_images_path)
    if not new_images_path.exists():
        shutil.move(images_path, new_images_path)

### STEP 2: Metrics processing

In [None]:
# TEST: get metrics.pkl from path
run_path = run_paths[0]
metrics_pkl_path = run_path / TEST_METRICS_FOLDER / "metrics.pkl"
metrics_pkl_path.exists()
results: metrics.DetMetrics
with open(metrics_pkl_path, 'rb') as f:
    results = pickle.load(f)
results

In [None]:
# 

col_names = "Class mAP50 mAP50-95 Box-P Box-R Box-F1".split()
class_names = "mean D00 D10 D20 D40".split()

# construct data dict
new_col_names = [
    f"{col}_{cls}"
    for col in col_names if col != "Class"
    for cls in class_names
]
data: dict[str, list[pl.Float64]] = { k: [] for k in new_col_names }

# iterate
for run_path in run_paths:
    metrics_pkl_path = run_path / TEST_METRICS_FOLDER / "metrics.pkl"
    metrics_pkl_path.exists()
    results: metrics.DetMetrics
    with open(metrics_pkl_path, 'rb') as f:
        results = pickle.load(f)

    # get results df
    df = results.to_df()
    df = df[col_names]

    # compute mean row
    mean_row = (
        df
        .select(pl.exclude("Class").mean())
        .with_columns(pl.lit("mean").alias("Class"))
        .select(df.columns)   # reorder
    )
    df = pl.concat([df, mean_row], how="vertical")

    # add values to data{}
    for cls in class_names:
        cls_d = df.filter(pl.col("Class") == cls).to_dict(as_series=False)
        del cls_d["Class"]
        for k, v in cls_d.items():
            key = f"{k}_{cls}"
            data[key] = data.get(key, []) + v

# create new df and save csv
run_paths_short =  [ str(p).replace("runs_puhti\\", "").split('\\') for p in run_paths ]
run_names =  [ p[1] for p in run_paths_short ]
run_groups = [ p[0] for p in run_paths_short ]
d = { "run_name": run_names, "run_group": run_groups } | data # add run_name as col
df_all = pl.DataFrame(d)
df_all.write_csv("analytics/metrics_table.csv")
df_all