In [None]:
# ==This is just temporary using experiment results as data set. Need to generate one==

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data_processing.process_experiments_results import experiments_results_to_empirical_vd_table, \
    add_state_likelihood_column, vd_table_to_empirical_voa_table

results_tables = {
    'planner_2000': '../../experiments_sim/results/experiments_planner_2000_results.csv',
    'planner_200': '../../experiments_sim/results/experiments_planner_200_results.csv',
    'handmade': '../../experiments_sim/results/experiments_handmade_results.csv',
}

dataframes = {
    key: pd.read_csv(value) for key, value in results_tables.items()
}

vd_tables = {
    key: experiments_results_to_empirical_vd_table(value) for key, value in dataframes.items()
}
# add state probability column
vd_tables = {
    key: add_state_likelihood_column(value) for key, value in vd_tables.items()
}

# compute empirical VOA table
voa_tables = {
    key: vd_table_to_empirical_voa_table(value) for key, value in vd_tables.items()
}
voa_tables['planner_200']

In [None]:
# generate data, take only rows with unique belief idx and take "belief_mus" and "belief_sigmas" and empirical_baseline_value
data = voa_tables['planner_200'][['belief_idx', 'belief_mus', 'belief_sigmas', 'empirical_baseline_value']]
data = data.drop_duplicates(subset='belief_idx')

from ast import literal_eval
# Convert string representations to lists
data['belief_mus'] = data['belief_mus'].apply(literal_eval)
data['belief_sigmas'] = data['belief_sigmas'].apply(literal_eval)
data

In [None]:
X = np.array([np.array(mu + sigma).flatten() for mu, sigma in zip(data['belief_mus'], data['belief_sigmas'])])
y = data['empirical_baseline_value'].values

print("Feature matrix shape:", X.shape)
print("Target vector shape:", y.shape)

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.dummy import DummyRegressor

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create dictionary of models to test
models = {
    'Mean Baseline': DummyRegressor(strategy='mean'),
    'Median Baseline': DummyRegressor(strategy='median'),
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'SVR': SVR()
}

# Perform cross-validation for each model
results = {}
for name, model in models.items():
    rmse_scores = cross_val_score(model, X_scaled, y, cv=5, scoring='neg_mean_squared_error')
    mae_scores = cross_val_score(model, X_scaled, y, cv=5, scoring='neg_mean_absolute_error')

    rmse_scores = np.sqrt(-rmse_scores)  # Convert MSE to RMSE
    mae_scores = -mae_scores  # Convert negative MAE to positive

    results[name] = {
        'mean_rmse': rmse_scores.mean(),
        'std_rmse': rmse_scores.std(),
        'mean_mae': mae_scores.mean(),
        'std_mae': mae_scores.std()
    }

# Print results sorted by mean MAE
print("Results sorted by Mean MAE:\n")
sorted_results = dict(sorted(results.items(), key=lambda x: x[1]['mean_mae']))
for name, metrics in sorted_results.items():
    print(f"{name}:")
    print(f"  Mean RMSE: {metrics['mean_rmse']:.4f} (±{metrics['std_rmse']:.4f})")
    print(f"  Mean MAE:  {metrics['mean_mae']:.4f} (±{metrics['std_mae']:.4f})")
    print()

# Next:
* More data, use original data and take belief after help.
* augmentation: scramble blocks