In [1]:
import os 
import glob
import time
import warnings
import numpy as np
from joblib import Parallel, delayed

from datasets import *
from benchmarks import *

In [2]:
reps = 10
num_cores = 5
nterms_max = 10
slfn_nterms_list = [10,30,50]
aim_nterms_list = [3,5,7,10]

knot_num = 10
datanum = int(1e4)

reg_lambda = [0.2, 0.3, 0.4]
reg_gamma = "GCV"
knot_dist = 'quantile'

In [3]:
data_list = ["wine_white",
             "wine_red"]

In [4]:
all_results_train = []
all_results_tune_time = []
all_results_optparam_time = []
all_results_ortho = []
all_results_test = []
all_results_nterms = []

for vb in data_list:
    data_loader = load_regression_data(vb)
    train_x, test_x, train_y, test_y, task_type, meta_info, get_metric = data_loader(0)

    print(vb)
    folder = "./results/" + vb + "/"
    if not os.path.exists(folder):
        os.makedirs(folder)

    start = time.time()
    if not os.path.exists(folder + 'seqstein_nonortho_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("SeqStein", data_loader, 
                                                                                   random_state=random_state, 
                                                                                   data_type='real_data',
                                                                                   knot_num=knot_num,
                                                                                   reg_lambda = reg_lambda,
                                                                                   ortho_enhance=[False],
                                                                                   nterms=nterms_max) 
                                                           for task_id, random_state in enumerate(range(reps)))
        seqstein_nonortho_stat = pd.concat(stat).values
        np.save(folder + 'seqstein_nonortho_stat.npy', seqstein_nonortho_stat)
    print("SeqStein NonOrtho Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'seqstein_ortho_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("SeqStein", data_loader, 
                                                                                   random_state=random_state, 
                                                                                   data_type='real_data',
                                                                                   knot_num=knot_num,
                                                                                   reg_lambda = reg_lambda,
                                                                                   ortho_enhance=[True],
                                                                                   nterms=nterms_max) 
                                                           for task_id, random_state in enumerate(range(reps)))
        seqstein_ortho_stat = pd.concat(stat).values
        np.save(folder + 'seqstein_ortho_stat.npy', seqstein_ortho_stat)
    print("SeqStein Ortho Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'aimlow_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("AIM", data_loader, 
                                                                          knot_dist=knot_dist,
                                                                          data_type='real_data',
                                                                          random_state=random_state,
                                                                          knot_num=knot_num, optlevel='low',
                                                                          nterms=aim_nterms_list) 
                                         for task_id, random_state in enumerate(range(reps)))
        aimlow_stat = pd.concat(stat).values
        np.save(folder + 'aimlow_stat.npy', aimlow_stat)
    print("AIMLow Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'aimhigh_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("AIM", data_loader, 
                                                                  knot_dist=knot_dist,
                                                                  data_type='real_data',
                                                                  random_state=random_state,
                                                                  knot_num=knot_num, optlevel='high',
                                                                  nterms=aim_nterms_list) 
                                         for task_id, random_state in enumerate(range(reps)))
        aimhigh_stat = pd.concat(stat).values
        np.save(folder + 'aimhigh_stat.npy', aimhigh_stat)
    print("AIMHigh Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'slfn_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("SLFN", data_loader,
                                                                  data_type='real_data',
                                                                   random_state=random_state, 
                                                                   nterms=slfn_nterms_list) 
                                     for task_id, random_state in enumerate(range(reps)))
        slfn_stat = pd.concat(stat).values
        np.save(folder + 'slfn_stat.npy', slfn_stat)
    print("SLFN Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'exnn_stat.npy'):
        try:
            stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("ExNN", data_loader,
                                                                      data_type='real_data',
                                                                       random_state=random_state, 
                                                                       nterms=nterms_max) 
                                             for task_id, random_state in enumerate(range(reps)))
        except:
            stat = Parallel(n_jobs=2)(delayed(batch_parallel)("ExNN", data_loader,
                                                              data_type='real_data',
                                                               random_state=random_state, 
                                                               nterms=nterms_max) 
                                         for task_id, random_state in enumerate(range(reps)))
        exnn_stat = pd.concat(stat).values
        np.save(folder + 'exnn_stat.npy', exnn_stat)
    print("ExNN Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    if not os.path.exists(folder + 'mlp_stat.npy'):
        stat = Parallel(n_jobs=num_cores)(delayed(batch_parallel)("MLP", data_loader,
                                                                  data_type='real_data',
                                                                   random_state=random_state) 
                                     for task_id, random_state in enumerate(range(reps)))
        mlp_stat = pd.concat(stat).values
        np.save(folder + 'mlp_stat.npy', mlp_stat)
    print("MLP Finished!", "Time Cost ", np.round(time.time() - start, 2), " Seconds!")

    seqstein_nonortho_stat = np.load(folder + 'seqstein_nonortho_stat.npy')
    seqstein_ortho_stat = np.load(folder + 'seqstein_ortho_stat.npy')
    aimlow_stat = np.load(folder + 'aimlow_stat.npy')
    aimhigh_stat = np.load(folder + 'aimhigh_stat.npy')
    slfn_stat = np.load(folder + 'slfn_stat.npy')
    mlp_stat = np.load(folder + 'mlp_stat.npy')
    exnn_stat = np.load(folder + 'exnn_stat.npy')  

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        stat = pd.DataFrame({"SeqStein-NonOrtho-Mean":np.nanmean(np.vstack(seqstein_nonortho_stat), 0),
                             "SeqStein-Ortho-Mean":np.nanmean(np.vstack(seqstein_ortho_stat), 0),
                             "AIMLow-Mean":np.nanmean(np.vstack(aimlow_stat), 0),
                             "AIMHigh-Mean":np.nanmean(np.vstack(aimhigh_stat), 0),
                             "SLFN-Mean":np.nanmean(np.vstack(slfn_stat), 0),
                             "ExNN-Mean":np.nanmean(np.vstack(exnn_stat), 0),
                             "MLP-Mean":np.nanmean(np.vstack(mlp_stat), 0),
                             "SeqStein-NonOrtho-Std":np.nanstd(np.vstack(seqstein_nonortho_stat), 0),
                             "SeqStein-Ortho-Std":np.nanstd(np.vstack(seqstein_ortho_stat), 0),
                             "AIMLow-Std":np.nanstd(np.vstack(aimlow_stat), 0),
                             "AIMHigh-Std":np.nanstd(np.vstack(aimhigh_stat), 0),
                             "SLFN-Std":np.nanstd(np.vstack(slfn_stat), 0),
                             "ExNN-Std":np.nanstd(np.vstack(exnn_stat), 0),
                             "MLP-Std":np.nanstd(np.vstack(mlp_stat), 0)},
                             index=["train_metric", "validation_metric", "test_metric", 
                                    'alltune_time_cost','optparam_time_cost','ortho_measure', 
                                   'nterms']).T

    results_train = stat[["train_metric"]].T
    results_train["Dataset"] = vb
    results_train["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_train["#Features"] = train_x.shape[1]
    results_train["Task"] = 'Regression'
    results_train = results_train.set_index("Dataset")

    results_test = stat[["test_metric"]].T
    results_test["Dataset"] = vb
    results_test["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_test["#Features"] = train_x.shape[1]
    results_test["Task"] = 'Regression'
    results_test = results_test.set_index("Dataset")

    results_tune_time = stat[["alltune_time_cost"]].T
    results_tune_time["Dataset"] = vb
    results_tune_time["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_tune_time["#Features"] = train_x.shape[1]
    results_tune_time["Task"] = 'Time'
    results_tune_time = results_tune_time.set_index("Dataset")   

    results_optparam_time = stat[["optparam_time_cost"]].T
    results_optparam_time["Dataset"] = vb
    results_optparam_time["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_optparam_time["#Features"] = train_x.shape[1]
    results_optparam_time["Task"] = 'Time'
    results_optparam_time = results_optparam_time.set_index("Dataset")   

    results_ortho = stat[["ortho_measure"]].T
    results_ortho["Dataset"] = vb
    results_ortho["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_ortho["#Features"] = train_x.shape[1]
    results_ortho["Task"] = 'Ortho'
    results_ortho = results_ortho.set_index("Dataset")  

    results_nterms = stat[['nterms']].T
    results_nterms["Dataset"] = vb
    results_nterms["#Samples"] = train_x.shape[0] + test_x.shape[0]
    results_nterms["#Features"] = train_x.shape[1]
    results_nterms["Task"] = 'Nterms'
    results_nterms = results_nterms.set_index("Dataset")  

    all_results_train.append(results_train)
    all_results_tune_time.append(results_tune_time)
    all_results_optparam_time.append(results_optparam_time)
    all_results_test.append(results_test)
    all_results_ortho.append(results_ortho)
    all_results_nterms.append(results_nterms)

wine_white
SeqStein NonOrtho Finished! Time Cost  10.1  Seconds!
SeqStein Ortho Finished! Time Cost  16.44  Seconds!
AIMLow Finished! Time Cost  30.12  Seconds!
AIMHigh Finished! Time Cost  124.95  Seconds!
SLFN Finished! Time Cost  184.51  Seconds!




ExNN Finished! Time Cost  1697.72  Seconds!
MLP Finished! Time Cost  1713.53  Seconds!
wine_red
SeqStein NonOrtho Finished! Time Cost  2.23  Seconds!
SeqStein Ortho Finished! Time Cost  4.33  Seconds!
AIMLow Finished! Time Cost  11.0  Seconds!
AIMHigh Finished! Time Cost  61.06  Seconds!
SLFN Finished! Time Cost  77.06  Seconds!




ExNN Finished! Time Cost  1032.81  Seconds!
MLP Finished! Time Cost  1039.43  Seconds!


In [5]:
# training error
results_train_latex = gen_reg_latex_results(pd.concat(all_results_train), 3)
results_train_latex.to_csv("./results/results_train_uci_reg.csv")
results_train_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,0.720$\pm$0.008,0.707$\pm$0.007,0.735$\pm$0.019,$\mathbf{0.683}$$\pm$0.018,0.726$\pm$0.011,0.723$\pm$0.022,0.707$\pm$0.011,4898,11
wine_red,0.621$\pm$0.008,0.617$\pm$0.015,0.629$\pm$0.016,$\mathbf{0.593}$$\pm$0.025,0.639$\pm$0.011,0.637$\pm$0.013,0.615$\pm$0.009,1599,11


In [6]:
# test error
results_test_latex = gen_reg_latex_results(pd.concat(all_results_test), 3)
results_test_latex.to_csv("./results/results_test_uci_reg.csv")
results_test_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,0.746$\pm$0.026,$\mathbf{0.730}$$\pm$0.025,0.767$\pm$0.040,$\mathbf{0.730}$$\pm$0.021,0.740$\pm$0.021,0.738$\pm$0.018,0.735$\pm$0.031,4898,11
wine_red,0.626$\pm$0.016,0.630$\pm$0.023,0.640$\pm$0.021,0.630$\pm$0.017,0.628$\pm$0.018,$\mathbf{0.625}$$\pm$0.017,0.634$\pm$0.018,1599,11


In [7]:
# orthogonal measure
results_tune_time_latex = gen_reg_latex_results(pd.concat(all_results_tune_time), 3)
results_tune_time_latex.to_csv("./results/results_tune_time_uci_reg.csv")
results_tune_time_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,$\mathbf{2.641}$$\pm$0.414,2.674$\pm$0.441,5.293$\pm$1.019,44.320$\pm$6.082,24.796$\pm$6.381,6.200$\pm$2.489,623.786$\pm$101.657,4898,11
wine_red,$\mathbf{0.775}$$\pm$0.326,0.874$\pm$0.454,2.783$\pm$0.516,23.058$\pm$3.280,5.422$\pm$2.370,2.150$\pm$0.910,406.997$\pm$47.171,1599,11


In [8]:
# time (include tuning cost)
results_optparam_time_latex = gen_reg_latex_results(pd.concat(all_results_optparam_time), 3)
results_optparam_time_latex.to_csv("./results/results_optparam_time_uci_reg.csv")
results_optparam_time_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,2.641$\pm$0.414,2.674$\pm$0.441,$\mathbf{1.539}$$\pm$0.615,20.035$\pm$5.698,9.092$\pm$3.143,6.200$\pm$2.489,107.387$\pm$20.231,4898,11
wine_red,0.775$\pm$0.326,0.874$\pm$0.454,$\mathbf{0.694}$$\pm$0.288,4.082$\pm$2.394,2.328$\pm$1.664,2.150$\pm$0.910,57.228$\pm$15.964,1599,11


In [9]:
# time (for optimal set of hyper-parameters)
results_ortho_latex = gen_reg_latex_results(pd.concat(all_results_ortho), 3)
results_ortho_latex.to_csv("./results/results_ortho_uci_reg.csv")
results_ortho_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,0.584$\pm$0.100,0.081$\pm$0.006,0.602$\pm$0.085,0.597$\pm$0.078,0.296$\pm$0.021,nan$\pm$nan,$\mathbf{0.000}$$\pm$0.000,4898,11
wine_red,0.322$\pm$0.098,0.090$\pm$0.022,0.337$\pm$0.061,0.309$\pm$0.059,0.296$\pm$0.011,nan$\pm$nan,$\mathbf{0.000}$$\pm$0.000,1599,11


In [10]:
# number of ridge terms
results_nterms_latex = gen_reg_latex_results(pd.concat(all_results_nterms), 3)
results_nterms_latex.to_csv("./results/results_nterms_uci_reg.csv")
results_nterms_latex

Unnamed: 0_level_0,SeqStein-NonOrtho,SeqStein-Ortho,AIMLow,AIMHigh,SLFN,MLP,ExNN,#Samples,#Features
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
wine_white,8.600$\pm$1.855,8.600$\pm$1.855,7.600$\pm$3.137,9.400$\pm$1.200,20.000$\pm$10.000,nan$\pm$nan,$\mathbf{7.100}$$\pm$0.700,4898,11
wine_red,$\mathbf{4.600}$$\pm$2.375,5.300$\pm$2.685,6.300$\pm$2.934,5.300$\pm$1.900,26.000$\pm$12.000,nan$\pm$nan,6.400$\pm$1.960,1599,11
