In [1]:
import torch
import pandas as pd
import numpy as np
from bikebench.conditioning import conditioning
from bikebench.design_evaluation.design_evaluation import *
from bikebench.data_loading import data_loading
from bikebench.resource_utils import resource_path


  from .autonotebook import tqdm as notebook_tqdm


This notebook resets the reference point calculation. This will cause the benchmarking scores to longer be in sync. 

In [2]:
def compute_ref_point(ref_scores, reduction, margin=0.05):
    ref_scores[np.isnan(ref_scores)] = -float("inf")
    if reduction == "max":
        ref_point = np.max(ref_scores, axis=0)
    elif reduction == "meanabs":
        ref_point = np.mean(np.abs(ref_scores), axis=0)
    else:
        raise ValueError(f"Unknown reduction: {reduction}")

    # add a small margin (5% of span) so ref is strictly worse than all points
    span = np.max(ref_scores, axis=0) - np.min(ref_scores, axis=0)
    ref_point = ref_point + margin * np.abs(span)

    return ref_point


def recompute_ref_point(evaluator, eval_names, path, reduction, device):
    print("Calculating reference point for scoring functions...")
    data = data_loading.load_bike_bench_test()
    num_data = data.shape[0]
    rider_condition = conditioning.sample_riders(num_data, split="test")
    use_case_condition = conditioning.sample_use_case(num_data, split="test")
    embedding = conditioning.sample_embedding(num_data, split="test")
    text_condition = conditioning.sample_text(num_data, split="test")

    condition = {"Rider": rider_condition, "Use Case": use_case_condition, "Text": text_condition, "Embedding": embedding}
    scores = evaluator(torch.tensor(data.values, dtype=torch.float32, device=device), condition)
    objective_scores = scores.detach().cpu().numpy()
    ref_point = compute_ref_point(objective_scores, reduction)
    df = pd.Series(ref_point, index=eval_names)
    df.to_csv(path, header=False)
    return df

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data = data_loading.load_bike_bench_train()
evaluations_list = get_standard_evaluations(device, aesthetics_mode="Embedding")
evaluator, requirement_names, is_objective, is_conditional = construct_tensor_evaluator(evaluations_list, data.columns)

path = resource_path("misc/ref_point.csv")

recompute_ref_point(evaluator, requirement_names, path, "max", device)

Calculating reference point for scoring functions...


Usability Score                                          1.005924e+00
Drag Force (N)                                           2.786199e+01
Knee Angle Error (deg.)                                  1.277838e+02
Hip Angle Error (deg.)                                   4.741717e+01
Arm Angle Error (deg.)                                   5.418302e+01
Arm Too Long for Bike                                   -2.197120e-01
Saddle Too Far From Handle                              -5.646414e-02
Torso Too Long for Bike                                 -3.668850e-01
Saddle Too Far From Crank                                2.208643e-01
Upper Leg Too Long for Bike                             -5.684242e-01
Lower Leg Too Long for Bike                             -1.531106e-01
Cosine Distance To Embedding                             3.997219e-01
Mass (kg)                                                1.434326e+01
Planar Compliance Score                                  9.962891e+00
Transverse Complianc