In [1]:
import sys
import os
parent_dir = os.path.dirname(os.getcwd())
sys.path.append(parent_dir)
%reload_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt

In [2]:
result_dir = 'eval_results'
run_name = 'esindy-noise05-growth'

In [3]:
def aggregate_results(run_name, min_seed=0, max_seed=100, mse_multiplier=1.0):
    directory = os.path.join(parent_dir, result_dir, run_name)
    cf, mse, cf_all, mse_all = [], [], [], []
    coefs = []
    for filename in os.listdir(directory):
        if filename.endswith('.npz'):
            file_path = os.path.join(directory, filename)
            seed = int(filename.split('.')[0][4:])
            if seed >= max_seed or seed < min_seed:
                continue
            res = np.load(file_path)
            cf.append(res['correct_form'])
            mse.append(res['mse'])
            cf_all.append(res['correct_form_all'])
            mse_all.append(res['mse_all'])
            coefs.append(res['coefficients'])
    print(f'Loaded results from {len(cf)} runs.')
    # Correct form for each equation
    cf = np.stack(cf)
    cf_sum = np.sum(cf, axis=0).astype(int)
    for i, each in enumerate(cf_sum):
        print(f'Equation {i} success rate = {cf_sum[i]}/{cf.shape[0]}')
    # Correct form for all equations
    cf_all_sum = np.sum(cf_all).astype(int)
    print(f'Joint success rate = {cf_all_sum}/{cf.shape[0]}')
    # RMSE for each equation
    mse = np.stack(np.sqrt(mse))
    mse[np.isnan(mse)] = np.max(mse[~np.isnan(mse)])
    for i in range(mse.shape[1]):
        mse_valid = np.mean(mse[np.where(cf[:, i]), i])
        std = np.std(mse[np.where(cf[:, i]), i])
        mse_any = np.mean(mse[:, i])
        std_any = np.std(mse[:, i])
        mse_valid *= mse_multiplier
        std *= mse_multiplier
        mse_any *= mse_multiplier
        std_any *= mse_multiplier
        print(f'Equation {i} RMSE = {mse_valid:.4f} ({std:.4f})')
        print(f'Equation {i} RMSE (any) = {mse_any:.4f} ({std_any:.4f})')
    # MSE for all equations
    mse_all = np.stack(np.sqrt(mse_all))
    mse_all[np.isnan(mse_all)] = np.max(mse_all[~np.isnan(mse_all)])
    mse_all_valid = np.mean(mse_all[np.where(cf_all)])
    std = np.std(mse_all[np.where(cf_all)])
    mse_all_any = np.mean(mse_all)
    std_any = np.std(mse_all)
    mse_all_valid *= mse_multiplier
    std *= mse_multiplier
    mse_all_any *= mse_multiplier
    std_any *= mse_multiplier
    print(f'All equations RMSE = {mse_all_valid:.4f} ({std:.4f})')
    print(f'All equations RMSE (any) = {mse_all_any:.4f} ({std_any:.4f})')

In [4]:
aggregate_results(run_name, max_seed=50)

Loaded results from 50 runs.
Equation 0 success rate = 50/50
Equation 1 success rate = 50/50
Joint success rate = 50/50
Equation 0 RMSE = 0.0181 (0.0098)
Equation 0 RMSE (any) = 0.0181 (0.0098)
Equation 1 RMSE = 0.0075 (0.0042)
Equation 1 RMSE (any) = 0.0075 (0.0042)
All equations RMSE = 0.0143 (0.0066)
All equations RMSE (any) = 0.0143 (0.0066)
