In [1]:
import os 
import glob
import time
import warnings
import numpy as np
from joblib import Parallel, delayed

from datasets import *
from benchmarks import *

In [2]:
reps = 10
num_cores = 5
nterms_max = 20
slfn_nterms_list = [10,30,50,100,200]
aim_nterms_list = [3,5,7,10,20]

knot_num = 10
datanum = int(1e4)

reg_lambda = [0.1, 0.5, 0.9]
reg_gamma = "GCV"
knot_dist = 'quantile'

In [3]:
data_list = ["wine_white",
             "wine_red"]

In [4]:
all_results_train = []
all_results_tune_time = []
all_results_optparam_time = []
all_results_ortho = []
all_results_test = []
all_results_nterms = []

for vb in data_list:
    data_loader = load_regression_data(vb)
    train_x, test_x, train_y, test_y, task_type, meta_info, get_metric = data_loader(0)

    print(vb)
    folder = "./results/" + vb + "/"
    if not os.path.exists(folder):
        os.makedirs(folder)

    start = time.time()
    if not os.path.exists(folder + 'seqstein_nonortho_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("SeqStein", data_loader, 
                                                                                   random_state=random_state, 
                                                                                   data_type='real_data',
                                                                                   knot_num=knot_num,
                                                                                   reg_lambda = reg_lambda,
                                                                                   ortho_enhance=[False],
                                                                                   nterms=nterms_max) 
                                                           for task_id, random_state in enumerate(range(reps)))
        seqstein_nonortho_stat = pd.concat(stat).values
        np.save(folder + 'seqstein_nonortho_stat.npy', seqstein_nonortho_stat)
    print("SeqStein NonOrtho Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'seqstein_ortho_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("SeqStein", data_loader, 
                                                                                   random_state=random_state, 
                                                                                   data_type='real_data',
                                                                                   knot_num=knot_num,
                                                                                   reg_lambda = reg_lambda,
                                                                                   ortho_enhance=[True],
                                                                                   nterms=nterms_max) 
                                                           for task_id, random_state in enumerate(range(reps)))
        seqstein_ortho_stat = pd.concat(stat).values
        np.save(folder + 'seqstein_ortho_stat.npy', seqstein_ortho_stat)
    print("SeqStein Ortho Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'aimlow_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("AIM", data_loader, 
                                                                          knot_dist=knot_dist,
                                                                          data_type='real_data',
                                                                          random_state=random_state,
                                                                          knot_num=knot_num, optlevel='low',
                                                                          nterms=aim_nterms_list) 
                                         for task_id, random_state in enumerate(range(reps)))
        aimlow_stat = pd.concat(stat).values
        np.save(folder + 'aimlow_stat.npy', aimlow_stat)
    print("AIMLow Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'aimhigh_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("AIM", data_loader, 
                                                                  knot_dist=knot_dist,
                                                                  data_type='real_data',
                                                                  random_state=random_state,
                                                                  knot_num=knot_num, optlevel='high',
                                                                  nterms=aim_nterms_list) 
                                         for task_id, random_state in enumerate(range(reps)))
        aimhigh_stat = pd.concat(stat).values
        np.save(folder + 'aimhigh_stat.npy', aimhigh_stat)
    print("AIMHigh Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'slfn_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("SLFN", data_loader,
                                                                  data_type='real_data',
                                                                   random_state=random_state, 
                                                                   nterms=slfn_nterms_list) 
                                     for task_id, random_state in enumerate(range(reps)))
        slfn_stat = pd.concat(stat).values
        np.save(folder + 'slfn_stat.npy', slfn_stat)
    print("SLFN Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'exnn_stat.npy'):
        try:
            stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("ExNN", data_loader,
                                                                      data_type='real_data',
                                                                       random_state=random_state, 
                                                                       nterms=nterms_max) 
                                             for task_id, random_state in enumerate(range(reps)))
        except:
            stat = Parallel(n_jobs=2)(delayed(batch_parallel)("ExNN", data_loader,
                                                              data_type='real_data',
                                                               random_state=random_state, 
                                                               nterms=nterms_max) 
                                         for task_id, random_state in enumerate(range(reps)))
        exnn_stat = pd.concat(stat).values
        np.save(folder + 'exnn_stat.npy', exnn_stat)
    print("ExNN Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'mlp_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("MLP", data_loader,
                                                                  data_type='real_data',
                                                                   random_state=random_state) 
                                     for task_id, random_state in enumerate(range(reps)))
        mlp_stat = pd.concat(stat).values
        np.save(folder + 'mlp_stat.npy', mlp_stat)
    print("MLP Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    seqstein_nonortho_stat = np.load(folder + 'seqstein_nonortho_stat.npy')
    seqstein_ortho_stat = np.load(folder + 'seqstein_ortho_stat.npy')
    aimlow_stat = np.load(folder + 'aimlow_stat.npy')
    aimhigh_stat = np.load(folder + 'aimhigh_stat.npy')
    slfn_stat = np.load(folder + 'slfn_stat.npy')
    mlp_stat = np.load(folder + 'mlp_stat.npy')
    exnn_stat = np.load(folder + 'exnn_stat.npy')  

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        stat = pd.DataFrame({"SeqStein-NonOrtho-Mean":np.nanmean(np.vstack(seqstein_nonortho_stat), 0),
                             "SeqStein-Ortho-Mean":np.nanmean(np.vstack(seqstein_ortho_stat), 0),
                             "AIMLow-Mean":np.nanmean(np.vstack(aimlow_stat), 0),
                             "AIMHigh-Mean":np.nanmean(np.vstack(aimhigh_stat), 0),
                             "SLFN-Mean":np.nanmean(np.vstack(slfn_stat), 0),
                             "ExNN-Mean":np.nanmean(np.vstack(exnn_stat), 0),
                             "MLP-Mean":np.nanmean(np.vstack(mlp_stat), 0),
                             "SeqStein-NonOrtho-Std":np.nanstd(np.vstack(seqstein_nonortho_stat), 0),
                             "SeqStein-Ortho-Std":np.nanstd(np.vstack(seqstein_ortho_stat), 0),
                             "AIMLow-Std":np.nanstd(np.vstack(aimlow_stat), 0),
                             "AIMHigh-Std":np.nanstd(np.vstack(aimhigh_stat), 0),
                             "SLFN-Std":np.nanstd(np.vstack(slfn_stat), 0),
                             "ExNN-Std":np.nanstd(np.vstack(exnn_stat), 0),
                             "MLP-Std":np.nanstd(np.vstack(mlp_stat), 0)},
                             index=["train_metric", "validation_metric", "test_metric", 
                                    'alltune_time_cost','optparam_time_cost','ortho_measure', 
                                   'nterms']).T

    results_train = stat[["train_metric"]].T
    results_train["Dataset"] = vb
    results_train["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_train["#Features"] = train_x.shape[1]
    results_train["Task"] = 'Regression'
    results_train = results_train.set_index("Dataset")

    results_test = stat[["test_metric"]].T
    results_test["Dataset"] = vb
    results_test["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_test["#Features"] = train_x.shape[1]
    results_test["Task"] = 'Regression'
    results_test = results_test.set_index("Dataset")

    results_tune_time = stat[["alltune_time_cost"]].T
    results_tune_time["Dataset"] = vb
    results_tune_time["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_tune_time["#Features"] = train_x.shape[1]
    results_tune_time["Task"] = 'Time'
    results_tune_time = results_tune_time.set_index("Dataset")   

    results_optparam_time = stat[["optparam_time_cost"]].T
    results_optparam_time["Dataset"] = vb
    results_optparam_time["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_optparam_time["#Features"] = train_x.shape[1]
    results_optparam_time["Task"] = 'Time'
    results_optparam_time = results_optparam_time.set_index("Dataset")   

    results_ortho = stat[["ortho_measure"]].T
    results_ortho["Dataset"] = vb
    results_ortho["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_ortho["#Features"] = train_x.shape[1]
    results_ortho["Task"] = 'Ortho'
    results_ortho = results_ortho.set_index("Dataset")  

    results_nterms = stat[['nterms']].T
    results_nterms["Dataset"] = vb
    results_nterms["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_nterms["#Features"] = train_x.shape[1]
    results_nterms["Task"] = 'Nterms'
    results_nterms = results_nterms.set_index("Dataset")  

    all_results_train.append(results_train)
    all_results_tune_time.append(results_tune_time)
    all_results_optparam_time.append(results_optparam_time)
    all_results_test.append(results_test)
    all_results_ortho.append(results_ortho)
    all_results_nterms.append(results_nterms)

wine_white
SeqStein NonOrtho Finished! Time Cost  13.81  Seconds!
SeqStein Ortho Finished! Time Cost  22.41  Seconds!
AIMLow Finished! Time Cost  46.31  Seconds!




AIMHigh Finished! Time Cost  406.62  Seconds!
SLFN Finished! Time Cost  567.02  Seconds!
ExNN Finished! Time Cost  1621.09  Seconds!
MLP Finished! Time Cost  1704.35  Seconds!
wine_red
SeqStein NonOrtho Finished! Time Cost  1.98  Seconds!
SeqStein Ortho Finished! Time Cost  5.11  Seconds!
AIMLow Finished! Time Cost  18.35  Seconds!
AIMHigh Finished! Time Cost  217.05  Seconds!
SLFN Finished! Time Cost  243.66  Seconds!




ExNN Finished! Time Cost  923.08  Seconds!
MLP Finished! Time Cost  933.57  Seconds!


In [5]:
# training error
results_train_latex = gen_reg_latex_results(pd.concat(all_results_train), 3)
results_train_latex.to_csv("./results/results_train_uci_reg.csv")
results_train_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,0.703$\pm$0.013,0.695$\pm$0.013,0.718$\pm$0.013,$\mathbf{0.672}$$\pm$0.028,0.726$\pm$0.011,0.705$\pm$0.012,0.706$\pm$0.008,4898,11
wine_red,0.618$\pm$0.011,0.613$\pm$0.013,0.608$\pm$0.023,$\mathbf{0.591}$$\pm$0.026,0.639$\pm$0.011,0.638$\pm$0.013,0.617$\pm$0.009,1599,11


In [6]:
# test error
results_test_latex = gen_reg_latex_results(pd.concat(all_results_test), 3)
results_test_latex.to_csv("./results/results_test_uci_reg.csv")
results_test_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,0.732$\pm$0.022,$\mathbf{0.725}$$\pm$0.024,0.752$\pm$0.032,0.731$\pm$0.019,0.740$\pm$0.021,0.730$\pm$0.026,0.745$\pm$0.026,4898,11
wine_red,$\mathbf{0.622}$$\pm$0.016,0.625$\pm$0.022,0.648$\pm$0.028,0.639$\pm$0.018,0.629$\pm$0.017,0.627$\pm$0.012,0.623$\pm$0.020,1599,11


In [7]:
# orthogonal measure
results_tune_time_latex = gen_reg_latex_results(pd.concat(all_results_tune_time), 3)
results_tune_time_latex.to_csv("./results/results_tune_time_uci_reg.csv")
results_tune_time_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,4.519$\pm$2.010,$\mathbf{3.768}$$\pm$1.617,10.909$\pm$1.566,170.772$\pm$19.213,67.388$\pm$16.045,35.957$\pm$9.704,472.639$\pm$50.770,4898,11
wine_red,$\mathbf{0.828}$$\pm$0.337,1.385$\pm$0.663,5.952$\pm$1.383,93.177$\pm$11.311,10.751$\pm$3.074,4.403$\pm$2.550,295.948$\pm$40.211,1599,11


In [8]:
# time (include tuning cost)
results_optparam_time_latex = gen_reg_latex_results(pd.concat(all_results_optparam_time), 3)
results_optparam_time_latex.to_csv("./results/results_optparam_time_uci_reg.csv")
results_optparam_time_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,4.519$\pm$2.010,3.768$\pm$1.617,$\mathbf{3.615}$$\pm$1.195,65.624$\pm$45.646,12.519$\pm$6.988,35.957$\pm$9.704,71.631$\pm$16.530,4898,11
wine_red,$\mathbf{0.828}$$\pm$0.337,1.385$\pm$0.663,1.588$\pm$0.813,13.457$\pm$17.642,2.838$\pm$1.888,4.403$\pm$2.550,37.816$\pm$12.555,1599,11


In [9]:
# time (for optimal set of hyper-parameters)
results_ortho_latex = gen_reg_latex_results(pd.concat(all_results_ortho), 3)
results_ortho_latex.to_csv("./results/results_ortho_uci_reg.csv")
results_ortho_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,0.633$\pm$0.112,0.170$\pm$0.095,0.587$\pm$0.067,0.622$\pm$0.049,0.296$\pm$0.021,nan$\pm$nan,$\mathbf{0.000}$$\pm$0.000,4898,11
wine_red,0.270$\pm$0.064,0.150$\pm$0.079,0.369$\pm$0.074,0.339$\pm$0.096,0.298$\pm$0.010,nan$\pm$nan,$\mathbf{0.000}$$\pm$0.000,1599,11


In [10]:
# number of ridge terms
results_nterms_latex = gen_reg_latex_results(pd.concat(all_results_nterms), 3)
results_nterms_latex.to_csv("./results/results_nterms_uci_reg.csv")
results_nterms_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,14.300$\pm$5.533,11.400$\pm$4.758,16.700$\pm$5.100,14.000$\pm$6.245,20.000$\pm$10.000,nan$\pm$nan,$\mathbf{7.600}$$\pm$1.356,4898,11
wine_red,$\mathbf{4.800}$$\pm$2.088,7.200$\pm$4.445,13.000$\pm$6.066,7.200$\pm$4.792,43.000$\pm$53.675,nan$\pm$nan,6.500$\pm$2.335,1599,11
