In [1]:
import rollout_doc_file_processor
import plotter
import numpy as np
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re

In [None]:
dir_path = "log/hopper-medium-v2/combo/seed_1_timestamp_25-0902-082145_benchmark/rollout_docs"
env_name = "Hopper-v2"

: 

In [None]:
doc_paths = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if f.endswith('.json')]

: 

In [None]:
docs = rollout_doc_file_processor.load_rollout_docs(doc_paths, add_model_error_if_not_contained=True, env=env_name, cast_to_nparray=False, verbose=True)

: 

In [None]:
# add epoch info to docs
for doc, path in zip(docs, doc_paths):
    match = re.search(r'epoch_(\d+)', path)
    if match:
        epoch = int(match.group(1))
        n = len(doc['obss'])
        doc["epoch"] = [epoch] * n

: 

In [None]:
# consolidate all dicts in docs
consolidated_doc = {}
for doc in docs:
    for key, value in doc.items():
        if key not in consolidated_doc:
            consolidated_doc[key] = []
        consolidated_doc[key] += value
docs = None

: 

In [None]:
consolidated_doc.keys()

: 

In [None]:
for k,v in consolidated_doc.items():
    consolidated_doc[k] = np.array(v)

: 

In [None]:
    # doc = rollout_doc_file_processor.load_rollout_docs("log/hopper-medium-expert-v2/combo/seed_1_timestamp_25-0825-124532/rollout_docs/epoch_220_timesteps_219000_rollout_doc.json")
    # doc = doc[0]
    # doc.keys()

doc = consolidated_doc

: 

In [None]:
uncertainty_measures = list(doc.keys())
uncertainty_measures.remove('obss')
uncertainty_measures.remove('actions')
uncertainty_measures.remove('next_obss_predicted')
uncertainty_measures.remove('next_obss_real')
uncertainty_measures.remove('model_error_l2')
uncertainty_measures.remove('rewards_real')
uncertainty_measures.remove('epoch')
uncertainty_measures.remove('step_nr')

uncertainty_measures

: 

In [None]:
plotter.plot_correlation_bars(doc, uncertainty_measures, error_key='model_error_l2', title='Correlation of Uncertainty Measures with Model Error', print_corr_values=True, fig_size=(10, 6))

: 

In [None]:
# Prepare data for correlation matrix
uncertainty_data = np.stack([doc[k] for k in uncertainty_measures], axis=1)
corr_matrix = np.corrcoef(uncertainty_data, rowvar=False)

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, xticklabels=uncertainty_measures, yticklabels=uncertainty_measures, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix of Uncertainty Measures")
plt.tight_layout()
plt.show()

: 

In [None]:
def plot_simple_scatter_correlation(doc, x_key, y_key):
    x = doc[x_key]
    y = doc[y_key]

    pearson_corr = np.corrcoef(x, y)[0, 1]
    spearman_corr, _ = spearmanr(x, y)

    plt.figure(figsize=(6, 6))
    plt.scatter(x, y, s=0.5, alpha=0.5)
    plt.xlabel(x_key)
    plt.ylabel(y_key)
    plt.title(f"Scatter plot: {x_key} vs {y_key}")
    plt.xscale("log")
    plt.yscale("log")
    plt.tight_layout()

    # Annotate correlations in bottom right
    textstr = f"Pearson r: {pearson_corr:.3f}\nSpearman r: {spearman_corr:.3f}"
    plt.gca().text(
        0.98, 0.02, textstr,
        transform=plt.gca().transAxes,
        fontsize=10,
        verticalalignment='bottom',
        horizontalalignment='right',
        bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.7)
    )

    plt.show()


: 

In [None]:
for u in uncertainty_measures:
    if u != "dimensionwise_ood_measure":
        plotter.plot_scatter_correlation(
            x=doc[u],
            y=doc["model_error_l2"],
            title=f"{u} vs Model Error L2",
            xlabel=u,
            ylabel="Model Error L2",
            bins=256,
            mark_percentile=70,
            fig_size=(6, 6),
            points_s=0.5,
            points_alpha=0.05
        )
    else:
        plot_simple_scatter_correlation(doc, u, "model_error_l2")


: 

In [None]:
x_min = -0.6
p1 = -0.35

p99 = 0.2
x_max = 0.35

u_max = 5

def u(x):
    if x < p1:
        return ((x - p1)/(x_min - p1))**2
    elif x < p99:
        return 0
    else:
        return ((x - p99)/(x_max - p99))**2

: 

In [None]:
# for u in uncertainty_measures:
#     if u != "dimensionwise_ood_measure":
#         plotter.plot_scatter_correlation(
#             x=doc[u],
#             y=doc["rewards_real"],
#             title=f"{u} vs Rewards Real",
#             xlabel=u,
#             ylabel="Rewards Real",
#             bins=256,
#             mark_percentile=70,
#             fig_size=(6, 6),
#             points_s=0.5,
#             points_alpha=0.05
#         )
#     else:
#         plot_simple_scatter_correlation(doc, u, "rewards_real")

: 

In [None]:
doc.keys()

: 

#### Fit OLS Model

In [None]:
unc_data = np.stack([doc[k] for k in uncertainty_measures]).T
target = doc["model_error_l2"]
unc_data.shape, target.shape

: 

In [None]:
# Fit linear regression model
reg_strength = 0.5
beta = np.linalg.inv(unc_data.T @ unc_data + reg_strength) @ unc_data.T @ target
y_hat = unc_data @ beta
beta, y_hat.shape

: 

In [None]:
plotter.plot_scatter_correlation(x= y_hat, y=doc["model_error_l2"], title=f"Combining Uncertainty Measures -OLS Error Prediction vs L2 Model Error", xlabel="OLS Error Prediction", ylabel="Model Error L2", bins=800, mark_percentile=70, fig_size=(6, 6), points_s=0.5, points_alpha=0.05)

: 

In [None]:
plotter.plot_scatter_correlation(x= y_hat, y=doc["rewards_real"], title=f"OLS Error Prediction using all uncertainty measures vs Real Reward", xlabel="OLS Error Prediction", ylabel="Real Reward", bins=800, mark_percentile=70, fig_size=(6, 6), points_s=0.5, points_alpha=0.05)

: 

=> No correlation between discrepancy measures and real rewards. Thus, by filtering out data with high discrepancy measure we don't remove well performing transitions in particular

## Logistic Regression on all Features as Filter Criterion - Target L2 Error Threshold: 0.1

In [None]:
# Logistic Regression to find criterion to filter model_error > 0.1

threshold = 0.1

targets_log_reg = (doc["model_error_l2"] > threshold).astype(int)
sample_weights = np.clip(np.abs(doc["model_error_l2"] - threshold)**0.5, 0, 2) # clipped sqrt abs distance

targets_log_reg.shape, sample_weights.shape

: 

In [None]:
plt.figure(figsize=(8, 4))
plt.hist(sample_weights, bins=1000, color='skyblue', edgecolor='black')
mean_val = np.mean(sample_weights)
plt.axvline(mean_val, color='r', linestyle='--', linewidth=1, label=f"Mean: {mean_val:.3f}")
plt.xlabel("Sample Weights")
plt.ylabel("Count")
plt.title("Histogram of Sample Weights for Logistic Regression")
plt.legend()
plt.show()

: 

In [None]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(unc_data, targets_log_reg, sample_weight=sample_weights)
log_reg.coef_

: 

In [None]:
y_hat_log_reg = log_reg.predict(unc_data)
y_hat_log_reg.shape

: 

In [None]:
x = y_hat
y = doc["model_error_l2"]

# Prepare colors by y_hat_log_reg
colors = np.where(y_hat_log_reg == 1, "red", "green")

plt.figure(figsize=(7, 7))
plt.scatter(x, y, c=colors, s=2, alpha=0.1, label=None)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("OLS prediction (log scale)")
plt.ylabel("model_error_l2 (log scale)")
plt.title("Accepted and Filtered Out Data - Displayed on OLS Prediction vs Model Error Plot")

# Add legend for colors
import matplotlib.patches as mpatches
plt.legend(handles=[
    mpatches.Patch(color='green', label='Accepted by Log-Reg'),
    mpatches.Patch(color='red', label='Filtered out by Log-Reg')
], loc='upper left')

plt.tight_layout()
plt.show()

: 

In [None]:
y_hat_log_reg.shape

: 

In [None]:
plotter.plot_filtering_analysis(doc, filter_indicator=y_hat_log_reg, bins=800, fig_size=(22, 9), filter_criterion="Logistic Regression Prediction - trained to filter out errors above 0.1")

: 

### Heuristic Filter Criterion - Filter out iff dimensionwise_diff_with_std > 0.15 OR aleatoric > 0.15

In [None]:
threshold_dimensionwise = 0.4
threshold_aleatoric = 0.35
threshold_ood = 2.2
filter_indicator = (doc["dimensionwise_diff_with_std"] > threshold_dimensionwise) | (doc["aleatoric"] > threshold_aleatoric ) | (doc["dimensionwise_ood_measure"] > threshold_ood)
filter_indicator.shape

: 

In [None]:
sum(y_hat_log_reg)

: 

In [None]:
plotter.plot_filtering_analysis(doc, filter_indicator=filter_indicator, bins=600, fig_size=(22, 9), filter_criterion=f"dimensionwise_diff_with_std > {threshold_dimensionwise} OR aleatoric > {threshold_aleatoric} OR dimensionwise_ood_measure > {threshold_ood}")

: 

In [None]:
plotter.plot_model_error_all_vs_accepted_per_step(doc, filter_indicator, epoch_vals=[225,250,350], title='Model Error Analysis With Performant Rollout Policy', statistics=["mean", "median", "P90"], curves_logscale=False)

: 

In [None]:
plotter.plot_model_error_all_vs_accepted_per_step(doc, filter_indicator, epoch_vals=[25,50], title='Model Error Analysis With Underfitted Rollout Policy', statistics=["mean", "median", "P90"], curves_logscale=False)

: 

=> Model error doesn't change much with rollout policy actor quality (with same dynamics model)