# High-level Validation
Let's calculate some statistics about the predictions we've made.

In [None]:
import os
import pickle

with open("../saved_models/validlist_2022-08-01T23-30-48.txt") as validfile:
    validset = {os.path.splitext(f)[0] for f in validfile.read().splitlines()}

preds_root = "../cached/first_predicted"
actuals_root = "../cached/first_ground"
preds = [pickle.load(open(os.path.join(preds_root, path), "rb")) for path in sorted(os.listdir(preds_root))
    if path.endswith(".pkl") and os.path.splitext(path)[0] in validset]
actuals = [pickle.load(open(os.path.join(actuals_root, path), "rb")) for path in sorted(os.listdir(actuals_root))
    if path.endswith(".pkl") and os.path.splitext(path)[0] in validset]

print(len(preds))
print(len(actuals))

assert all(p[0]["name"] == p[1]["name"] for p in zip(preds, actuals))
print(actuals[0].keys())

In [None]:

import pandas as pd
hl_df_dict = {}
def add_both(name, accessor):
    hl_df_dict["pred_"+name] = [accessor(pred) for pred in preds]
    hl_df_dict["actual_"+name] = [accessor(actual) for actual in actuals]
add_both("intercept", lambda row: (*row["horizon"],)[0])  # Hey turns out it _was_ iterable
add_both("slope", lambda row: (*row["horizon"],)[1])
add_both("ice_amount", lambda row: row["ice_amount"])
add_both("closest_ice", lambda row: row["closest_ice"])

In [None]:
from cv_experiments.seg2info import Seg2Info
s2i = Seg2Info()
import numpy as np

logb = s2i.proc_props["logb"]
far = s2i.cam_props["horizon_distance"]
near = s2i.cam_props["near_distance"]

log_dist = lambda dists: logb(np.where(dists > far, far, dists))
add_both("log_closest_ice", lambda row: log_dist(row["closest_ice"]))
hl_df = pd.DataFrame(hl_df_dict)
display(hl_df)

In [None]:
def eval_linear(name=None, format_1="", format_2="", actual=None, pred=None):
    if actual is None: actual = hl_df[f'actual_{name}']
    if pred is None: pred = hl_df[f'pred_{name}']
    print(f"Mean actual {name}: {actual.mean():{format_1}}")
    print(f"Mean predicted {name}: {pred.mean():{format_1}}")
    print(f"SD actual {name}: {(sd := actual.std()):{format_2}}")
    print(f"SD predicted {name}: {pred.std():{format_2}}")
    print(f"RMSE {name}: {(rmse := ((pred-actual) ** 2).mean() ** 0.5):{format_1}}")
    print(f"RMSE is {rmse/sd:.2%} of SD for {name}")
    print()
eval_linear("slope", ".2E", ".3f")
eval_linear("intercept", ".1f", ".1f")

Looks good! Now let's evaluate the actual output metrics:

In [None]:
# moderate_mask = (hl_df["actual_closest_ice"] > near) & (hl_df["actual_closest_ice"] < far) & (hl_df["pred_closest_ice"] > near) & (hl_df["pred_closest_ice"] < far)
moderate_mask = (hl_df["actual_closest_ice"] < float("inf")) & (hl_df["pred_closest_ice"] < float("inf"))
filter_moderate = lambda data_key: hl_df[moderate_mask][data_key]

eval_linear("ice_amount", ".3f", ".3f")
eval_linear("log_closest_ice", ".3f", ".3f")
eval_linear("log_closest_ice_moderate", ".3f", ".3f", filter_moderate("actual_log_closest_ice"), filter_moderate("pred_log_closest_ice"))

Interesting.