In [None]:
import training
import numpy as np
from paths import RESULTS_FILTERED_DIR, RESULTS_UNFILTERED_DIR
from model_selection import rank_models, plot_model_losses, haversine_np
from save_results import load_scaler, plot_samples
import pickle

In [None]:
device = training.determine_device()
print("Using device:", device)

In [None]:
metrics = ["val_mse", "val_rmse", "val_mae"]
best_model_name, best_score, best_model_data = rank_models(RESULTS_FILTERED_DIR, metrics[0])

In [None]:
# plot losses for the best model based on MSE

plot_model_losses(best_model_data)

In [None]:
file_path = 'small_transformer_results.pkl'

# Open the pickle file
with open(file_path, 'rb') as file:
    results = pickle.load(file)


In [None]:
all_mse = results["all_mse"]
x = results["x"]
y = results["y"]
y_pred = results["y_pred"]
n_samples = len(all_mse)
sorted_indices = np.argsort(all_mse)

In [None]:
percentiles = [10, 30, 50, 80, 90, 95, 99, 99.99, 100]
print("\nMSE percentile summary:\n")
for p in percentiles:
    threshold = np.percentile(all_mse, p)
    print(f"{p:>3}% of samples have MSE ≤ {threshold:.6f}")

In [None]:
scaler = load_scaler(filtered=True)

groups = {
    "Best": sorted_indices[:3],
    "Q1": sorted_indices[n_samples//4:n_samples//4+3],
    "Median": sorted_indices[n_samples//2:n_samples//2+3],
    "Q3": sorted_indices[3*n_samples//4:3*n_samples//4+3],
    "Worst": sorted_indices[-3:]
}

group_means_dict = {}

print("\n=== Haversine Distance Evaluation by Groups ===")

for group_name, indices in groups.items():
    group_means = []

    print(f"\n### {group_name} group ###")

    for idx in indices:

        y_true_scaled = y[idx]
        y_pred_scaled = y_pred[idx]


        y_true_unscaled = scaler.inverse_transform(y_true_scaled)
        y_pred_unscaled = scaler.inverse_transform(y_pred_scaled)

        # Compute Haversine
        dists_km, mean_hav_km = haversine_np(y_true_unscaled, y_pred_unscaled)

        # Save group mean
        group_means.append(mean_hav_km)

        # --- Pretty step-wise print for groups ---
        print(f"\nSample {idx} (MSE={all_mse[idx]:.6f}) – Haversine per step:")
        for step, d in enumerate(dists_km, start=1):
            print(f"  Step {step:02d} → {d:10.6f} km")

        print(f"  -> Mean Haversine for this sample: {mean_hav_km:.6f} km")

        group_means_dict[group_name] = group_means

In [None]:
plot_samples(x,y, y_pred, all_mse, groups, group_means_dict, scaler=scaler)