In [1]:
from utils import *
from sklearn.datasets import make_friedman1, make_friedman2, make_friedman3
from scipy import optimize
from scipy.stats import spearmanr
from multiprocessing import Pool, cpu_count
from functools import partial
from itertools import repeat
import pandas as pd
# corrcoef_type = 'p'
# c_val_bounds = 'fixed' #(1e-9, 1e5) 

In [3]:
def runner(
    fried_num, 
    noise, 
    norm,
    c_val_bounds, 
    corrcoef_type,
    ntk_depth, 
    ansiotropic,
    white,
    opt_bounds = [0.0001, 10],
    opt_resets = 10,
    figs = False,
    diagnostic = False
    ):

    if type(ntk_depth) == int:
        ntk_depth = (ntk_depth,)

    return_vals = {
        'Dataset' : f'Friedman {fried_num}',
        'Noise': False if noise == 0.0 else True,
        'Norm' : norm,
        'Constant Value Bounds' : 'fixed' if c_val_bounds == 'fixed' else 'unfixed',
        'Corr Type' : 'Pearson ' if corrcoef_type == 'p' else 'Spearman '
        }
    print(
        f"Friedman {fried_num}, Noise: {noise}, Norm: {norm}, cv_bounds: {c_val_bounds}, corrcoef_type: {corrcoef_type}"
    )
    opt_lower, opt_upper = opt_bounds

    # Datasets
    X_init, y_init = [None, None]
    if fried_num==1:
        X_init, y_init = make_friedman1(noise=noise, random_state=18397425)
    elif fried_num==2:
        X_init, y_init = make_friedman2(noise=noise, random_state=18397425)
    elif fried_num==3:
        X_init, y_init = make_friedman3(noise=noise, random_state=18397425)
    else:
        return Exception("fried_num must be either 1, 2, or 3")
    
    datasets = processing(X_init, y_init)
    X, y, X_train, y_train = [None, datasets['orig'][1], None, datasets['orig train'][1]]

    if norm:
        X = datasets['norm'][0]
        X_train = datasets['norm train'][0]
    else:
        X = datasets['orig'][0]
        X_train = datasets['orig train'][0]


    if noise != 0.0 and white == True:
        lpk = (
            ConstantKernel(
                constant_value=0.224**2,
                constant_value_bounds=c_val_bounds
            ) *
            Matern(
                nu=1/2,
                length_scale=np.array([np.random.rand()*np.random.randint(1, 1000) 
                                       for i in range(0, X.shape[1])]) if ansiotropic else 1,
                length_scale_bounds='fixed'  # (1e-1, 1e3),
            ) + WhiteKernel()  # (0.001, 1000))
        )
    else:
        lpk = (
            ConstantKernel(
                constant_value=0.224**2,
                constant_value_bounds=c_val_bounds
            ) *
            Matern(
                nu=1/2,
                # anisotropic true -> trying to fit product of laplace kernels with different lengthscales
                length_scale=np.array([np.random.rand()*np.random.randint(1, 1000)
                                       for i in range(0, X.shape[1])]) if ansiotropic else 1,
                # length_scale=1,
                length_scale_bounds='fixed'  # (1e-1, 1e3),
            )
        )

    gp_lpk = GPR(kernel=lpk, alpha=1e-9, normalize_y=True,
                 n_restarts_optimizer=0, random_state=3480795)

    # Lengthscale Optimization
    def g(ell, gp, residual, typ, p_or_s='p'):
        try:
            gp.set_params(**{'kernel__k2__length_scale': ell})
        except:
            gp.set_params(**{'kernel__k1__k2__length_scale': ell})

        gp.fit(X_train, y_train)
        mean = gp.predict(X)

        if diagnostic == True:
            print(typ, gp.kernel_)

        if p_or_s == 's':
            return -spearmanr(residual, y-mean)[0]
        else:
            return -np.corrcoef((residual)[:, 0], (y-mean)[:, 0])[0, 1]

    for depth in ntk_depth:
        # Kernels
        if noise != 0.0 and white == True:
            ntk = (
                ConstantKernel(
                    constant_value=1,
                    constant_value_bounds=(1e-9, 1e2)
                ) *
                NTK(depth=depth, c=2,
                    bias=1e-1,
                    bias_bounds=(1e-9, 1e0)
                    ) + WhiteKernel()
            )
        else:
            ntk = (
                ConstantKernel(
                    constant_value=1,
                    constant_value_bounds=(1e-9, 1e2)
                ) *
                NTK(depth=depth, c=2,
                    bias=1e-1,
                    bias_bounds=(1e-9, 1e0)
                    )
            )

        gp_ntk = GPR(kernel=ntk, alpha=1e-9, normalize_y=True,
                    n_restarts_optimizer=9, random_state=3480795)
        gp_ntk.fit(X_train, y_train)
        print(f'NTK {depth} : ', gp_ntk.kernel_)
        mean_ntk = gp_ntk.predict(X)
            
        if ansiotropic:
            ell_lpk = optimize.minimize(g, np.array([np.random.rand()*np.random.randint(1, 1000) for i in range(0, X.shape[1])]), args=(
                gp_lpk, y-mean_ntk, f'LPK {depth}:', corrcoef_type), bounds=tuple((0.00001, 10) for n in range(0, X.shape[1])))
            for i in range(0, opt_resets):
                tmp = optimize.minimize(g, np.array([[np.random.rand()*np.random.randint(1, 1000) for i in range(0, X.shape[1])]]), args=(
                    gp_lpk, y-mean_ntk, f'LPK {depth}:', corrcoef_type), bounds=tuple((0.00001, 10**i) for n in range(0, X.shape[1])))
                if tmp.fun < ell_lpk.fun:
                    ell_lpk = tmp

        else:
            ell_lpk = optimize.minimize_scalar(g, args=(
                gp_lpk, y-mean_ntk, f'LPK {depth}:', corrcoef_type), method='bounded', bounds=opt_bounds, options={'maxiter': 10000})
            for i in range(0, opt_resets):
                tmp = optimize.minimize_scalar(g, args=(gp_lpk, y-mean_ntk, f'LPK {depth}:', corrcoef_type),
                                               method='bounded', bounds=[opt_lower, opt_upper*(10**i)], options={'maxiter': 10000})
                if tmp.fun < ell_lpk.fun:
                    ell_lpk = tmp


        try:
            gp_lpk.set_params(**{'kernel__k2__length_scale': ell_lpk.x})
        except:
            gp_lpk.set_params(**{'kernel__k1__k2__length_scale': ell_lpk.x})
        gp_lpk.fit(X_train, y_train)
        mean_lpk_opt = gp_lpk.predict(X)


        # Begin gathering return values
        return_vals[f'lpk_{depth}'] = ell_lpk.x
        return_vals[f'f_lpk_{depth}'] = -ell_lpk.fun

        if corrcoef_type == 's':
            return_vals[f'fp_lpk_{depth}'] = np.corrcoef((y-mean_ntk)[:,0], (y-mean_lpk_opt)[:,0])[0, 1]
        else:
            return_vals[f'fs_lpk_{depth}'] = spearmanr(y-mean_ntk, y-mean_lpk_opt)[0]

        if norm:
            if noise != 0.0 and white == True:
                rbf = (
                    ConstantKernel(
                        constant_value=0.224**2, 
                        constant_value_bounds=c_val_bounds
                    ) * 
                    Matern(
                        nu=np.inf,
                        length_scale=np.array([np.random.rand()*np.random.randint(1, 1000)
                                               for i in range(0, X.shape[1])]) if ansiotropic else 1,
                        length_scale_bounds='fixed'#(1e-1, 1e3), 
                    ) + WhiteKernel()#(0.001, 1000))
                )
            else:
                rbf = (
                    ConstantKernel(
                        constant_value=0.224**2,
                        constant_value_bounds=c_val_bounds
                    ) *
                    Matern(
                        nu=np.inf,
                        length_scale=np.array([np.random.rand()*np.random.randint(1, 1000)
                                               for i in range(0, X.shape[1])]) if ansiotropic else 1,
                        length_scale_bounds='fixed',  # (1e-1, 1e3),
                    )
                )

            gp_rbf = GPR(kernel=rbf, alpha=1e-9, normalize_y=True, n_restarts_optimizer=0, random_state=3480795)

            if ansiotropic:
                ell_rbf = optimize.minimize(g, np.array([np.random.rand()*np.random.randint(1, 1000) for i in range(0, X.shape[1])]), args=(
                    gp_rbf, y-mean_ntk, f'RBF {depth}:', corrcoef_type), bounds=tuple((0.00001, 10) for n in range(0, X.shape[1])))
                for i in range(0, opt_resets):
                    tmp = optimize.minimize(g, np.array([[np.random.rand()*np.random.randint(1, 1000) for i in range(0, X.shape[1])]]), args=(
                        gp_rbf, y-mean_ntk, f'RBF {depth}:', corrcoef_type), bounds=tuple((0.00001, 10**i) for n in range(0, X.shape[1])))
                    if tmp.fun < ell_rbf.fun:
                        ell_rbf = tmp

            else:
                ell_rbf = optimize.minimize_scalar(g, args=(
                    gp_rbf, y-mean_ntk, f'RBF {depth}:', corrcoef_type), method='bounded', bounds=opt_bounds, options={'maxiter': 10000})
                for i in range(0, opt_resets):
                    tmp = optimize.minimize_scalar(g, args=(gp_rbf, y-mean_ntk, f'RBF {depth}:', corrcoef_type),
                                                method='bounded', bounds=[opt_lower, opt_upper*(10**i)], options={'maxiter': 10000})
                    if tmp.fun < ell_rbf.fun:
                        ell_rbf = tmp

            try:
                gp_rbf.set_params(**{'kernel__k2__length_scale': ell_rbf.x})
            except:
                gp_rbf.set_params(**{'kernel__k1__k2__length_scale': ell_rbf.x})
            gp_rbf.fit(X_train, y_train)
            mean_rbf_opt = gp_rbf.predict(X)

            # Continue gathering return values
            return_vals[f'rbf_{depth}'] = ell_rbf.x
            return_vals[f'f_rbf_{depth}'] = -ell_rbf.fun

            if corrcoef_type == 's':
                return_vals[f'fp_rbf_{depth}'] = np.corrcoef((y-mean_ntk)[:,0], (y-mean_rbf_opt)[:,0])[0, 1]
            else:
                return_vals[f'fs_rbf_{depth}'] = spearmanr(y-mean_ntk, y-mean_rbf_opt)[0]
                
    #         if figs:
    #             ax[0][1].scatter(y - mean_ntk_1, y - mean_rbf_opt_1)
    #             ax[0][1].set_title(f'Depth={ntk_depth[0]}, Lengthscale={round(ell_rbf_1.x, 4)}')
    #             ax[1][1].scatter(y - mean_ntk_2, y - mean_rbf_opt_2)
    #             ax[1][1].set_title(f'Depth={ntk_depth[1]}, Lengthscale={round(ell_rbf_2.x, 4)}')
    #             ax[1][1].set_xlabel('RBF residuals')
    
    # if figs:
    #     noise_text = 'eless' if noise==0.0 else 'y'
    #     space = r'$\mathbb{S}^{d-1}$' if norm else r'$\mathbb{R}^{d}$'
    #     corr = 'Pearson' if corrcoef_type == 'p' else 'Spearman'
    #     fig.suptitle(f'Nois{noise_text} Friedman {fried_num} in ' + space + f" using {corr} optimization")
    #     fig.supylabel('NTK residuals')
    #     fig.tight_layout()

    #     return_vals[f'fig'] = [fig, ax]


    # fig, ax = None, None
    # if figs:
    #     ncols = 2 if norm else 1
    #     sizex = 10 if norm else 5
    #     fig, ax = plt.subplots(nrows=2, ncols=ncols, sharex=True, sharey=True, figsize=(sizex,10))
    #     if ncols == 1:
    #         ax = np.expand_dims(ax, axis=1)
    #     ax[0][0].scatter(y - mean_ntk_1, y - mean_lpk_opt_1)
    #     ax[0][0].set_title(f'Depth={ntk_depth[0]}, Lengthscale={round(ell_lpk_1.x, 4)}')
    #     ax[0][0].set_ylabel('')
    #     ax[1][0].scatter(y - mean_ntk_2, y - mean_lpk_opt_2)
    #     ax[1][0].set_title(f'Depth={ntk_depth[1]}, Lengthscale={round(ell_lpk_2.x, 4)}')
    #     ax[1][0].set_xlabel('LaPlace residuals')

    return return_vals

In [5]:
# %%capture
depths=(3,100)
params = [
    [1, 0.0, False, 'fixed', 'p'],
    [2, 0.0, False, 'fixed', 'p']
]

kparams = {'ntk_depth': depths, 'ansiotropic': False, 'white': False}

# test = []
# for param in params:
#     test.append(runner(*param, **kparams))

with Pool(processes=cpu_count()) as p:
    test = p.starmap(
        partial(runner, **kparams), params
    )

# test = runner(*[        1,   0.15, True,      (1e-9, 1e5),  100,           'p'], 
#     diagnostic=False, ansiotropic=True, white=False)

Friedman 1, Noise: 0.0, Norm: False, cv_bounds: fixed, corrcoef_type: pFriedman 2, Noise: 0.0, Norm: False, cv_bounds: fixed, corrcoef_type: p

NTK 3 :  0.0113**2 * NTK(depth=3, c=2.000, bias=0.056)
NTK 3 :  0.441**2 * NTK(depth=3, c=2.000, bias=0.353)
NTK 100 :  0.000429**2 * NTK(depth=100, c=2.000, bias=0.002)
NTK 100 :  0.0718**2 * NTK(depth=100, c=2.000, bias=0.193)


In [6]:
test

[{'Dataset': 'Friedman 1',
  'Noise': False,
  'Norm': False,
  'Constant Value Bounds': 'fixed',
  'Corr Type': 'Pearson ',
  'lpk_3': 2.342221507166084,
  'f_lpk_3': 0.8883573030439164,
  'fs_lpk_3': 0.8252865286528652,
  'lpk_100': 0.23322660328530548,
  'f_lpk_100': 0.9956018205470821,
  'fs_lpk_100': 0.9329972997299729},
 {'Dataset': 'Friedman 2',
  'Noise': False,
  'Norm': False,
  'Constant Value Bounds': 'fixed',
  'Corr Type': 'Pearson ',
  'lpk_3': 30.48768093247719,
  'f_lpk_3': 0.6585293787937725,
  'fs_lpk_3': 0.5879987998799879,
  'lpk_100': 28.202666628402447,
  'f_lpk_100': 0.6476731538127664,
  'fs_lpk_100': 0.5200120012001199}]

## Experiment 0: Constant Value Optimization

In [7]:
params = [
    [        1,   0.0, False,      'fixed', 'p'], # 0
    [        1,   0.0, False,  (1e-9, 1e5), 'p'], # 1
    [        1,   0.0,  True,      'fixed', 'p'], # 2
    [        1,   0.0,  True,  (1e-9, 1e5), 'p'], # 3
    [        1,  0.15, False,      'fixed', 'p'], # 4 
    [        1,  0.15, False,  (1e-9, 1e5), 'p'], # 5
    [        1,  0.15,  True,      'fixed', 'p'], # 6
    [        1,  0.15,  True,  (1e-9, 1e5), 'p'], # 7
    
    [        1,   0.0, False,      'fixed', 's'], # 8
    [        1,   0.0, False,  (1e-9, 1e5), 's'], # 9
    [        1,   0.0,  True,      'fixed', 's'], # 10
    [        1,   0.0,  True,  (1e-9, 1e5), 's'], # 11
    [        1,  0.15, False,      'fixed', 's'], # 12
    [        1,  0.15, False,  (1e-9, 1e5), 's'], # 13
    [        1,  0.15,  True,      'fixed', 's'], # 14
    [        1,  0.15,  True,  (1e-9, 1e5), 's'], # 15


    [        2,   0.0, False,      'fixed', 'p'], # 16
    [        2,   0.0, False,  (1e-9, 1e5), 'p'], # 17
    [        2,   0.0,  True,      'fixed', 'p'], # 18
    [        2,   0.0,  True,  (1e-9, 1e5), 'p'], # 19
    [        2,  0.15, False,      'fixed', 'p'], # 20
    [        2,  0.15, False,  (1e-9, 1e5), 'p'], # 21
    [        2,  0.15,  True,      'fixed', 'p'], # 22
    [        2,  0.15,  True,  (1e-9, 1e5), 'p'], # 23

    [        2,   0.0, False,      'fixed', 's'], # 24
    [        2,   0.0, False,  (1e-9, 1e5), 's'], # 25
    [        2,   0.0,  True,      'fixed', 's'], # 26
    [        2,   0.0,  True,  (1e-9, 1e5), 's'], # 27
    [        2,  0.15, False,      'fixed', 's'], # 28
    [        2,  0.15, False,  (1e-9, 1e5), 's'], # 29
    [        2,  0.15,  True,      'fixed', 's'], # 30
    [        2,  0.15,  True,  (1e-9, 1e5), 's'], # 31


    [        3,   0.0, False,  (1e-9, 1e5), 'p'], # 33
    [        3,   0.0,  True,      'fixed', 'p'], # 34
    [        3,   0.0,  True,  (1e-9, 1e5), 'p'], # 35
    [        3,   0.0, False,      'fixed', 'p'], # 32
    [        3,  0.15, False,      'fixed', 'p'], # 36
    [        3,  0.15, False,  (1e-9, 1e5), 'p'], # 37
    [        3,  0.15,  True,      'fixed', 'p'], # 38
    [        3,  0.15,  True,  (1e-9, 1e5), 'p'], # 39
    
    [        3,   0.0, False,      'fixed', 's'], # 40
    [        3,   0.0, False,  (1e-9, 1e5), 's'], # 41
    [        3,   0.0,  True,      'fixed', 's'], # 42
    [        3,   0.0,  True,  (1e-9, 1e5), 's'], # 43
    [        3,  0.15, False,      'fixed', 's'], # 44
    [        3,  0.15, False,  (1e-9, 1e5), 's'], # 45
    [        3,  0.15,  True,      'fixed', 's'], # 46
    [        3,  0.15,  True,  (1e-9, 1e5), 's'], # 47
]

depths = (3, 100)
kparams = kparams = {'ntk_depth': depths, 'ansiotropic': False, 'white': False}

In [8]:
arrays_lpk = [
    ['Friedman 1', 'Friedman 2', 'Friedman 3'],
    [False, True],  # Noise
    [False, True],  # Norm
    depths,
    ['fixed', 'unfixed']
]

arrays_rbf = [
    ['Friedman 1', 'Friedman 2', 'Friedman 3'],
    [False, True],  # Noise
    depths,
    ['fixed', 'unfixed']
]

index_lpk = pd.MultiIndex.from_product(
    arrays_lpk, names=["Dataset", "Noise", "Norm", "Depth", "Constant Value Bounds"])

index_rbf = pd.MultiIndex.from_product(
    arrays_rbf, names=["Dataset", "Noise", "Depth", "Constant Value Bounds"])


df_lpk = pd.DataFrame(
    index=index_lpk,
    columns=['Pearson X', 'Spearman X', 'Pearson f(X)', 'Spearman f(Xp)', 'Spearman f(X)', 'Pearson f(Xs)'])

df_rbf = pd.DataFrame(
    index=index_rbf,
    columns=['Pearson X', 'Spearman X', 'Pearson f(X)', 'Spearman f(Xp)', 'Spearman f(X)', 'Pearson f(Xs)'])


In [9]:
# %%capture
with Pool(processes=cpu_count()) as p:
    results = p.starmap(
        partial(runner, **kparams), params
    )

for out in results:
    for depth in depths:

        if out["Norm"] == True:
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "X"] = out['rbf_' + str(depth)]
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "f(X)"] = out['f_rbf_' + str(depth)]

            if out['Corr Type'] == 'Pearson ':
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            out["Constant Value Bounds"]), "Spearman f(Xp)"] = out['fs_rbf_' + str(depth)]
            else:
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            out["Constant Value Bounds"]), "Pearson f(Xs)"] = out['fp_rbf_' + str(depth)]
        else:
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "X"] = out['lpk_' + str(depth)]
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "f(X)"] = out['f_lpk_' + str(depth)]

            if out['Corr Type'] == 'Pearson ':
                df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                            out["Constant Value Bounds"]), "Spearman f(Xp)"] = out['fs_lpk_' + str(depth)]
            else:
                df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                            out["Constant Value Bounds"]), "Pearson f(Xs)"] = out['fp_lpk_' + str(depth)]


TypeError: runner() missing 2 required positional arguments: 'ansiotropic' and 'white'

In [None]:
df_lpk_const, df_rbf_const = df_lpk, df_rbf

In [None]:
df_lpk_const

In [None]:
df_rbf_const

## Experiment 1, 1.5 - Noise optimization

In [12]:
params = [
    {'fried_num': 1, 'noise': 0.15, 'norm': False, 'c_val_bounds':     'fixed', 'corrcoef_type': 'p'}, # 4 
    {'fried_num': 1, 'noise': 0.15, 'norm': False, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 'p'}, # 5
    {'fried_num': 1, 'noise': 0.15, 'norm':  True, 'c_val_bounds':     'fixed', 'corrcoef_type': 'p'}, # 6
    {'fried_num': 1, 'noise': 0.15, 'norm':  True, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 'p'}, # 7

    {'fried_num': 1, 'noise': 0.15, 'norm': False, 'c_val_bounds':     'fixed', 'corrcoef_type': 's'}, # 12
    {'fried_num': 1, 'noise': 0.15, 'norm': False, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 's'}, # 13
    {'fried_num': 1, 'noise': 0.15, 'norm':  True, 'c_val_bounds':     'fixed', 'corrcoef_type': 's'}, # 14
    {'fried_num': 1, 'noise': 0.15, 'norm':  True, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 's'}, # 15


    {'fried_num': 2, 'noise': 0.15, 'norm': False, 'c_val_bounds':     'fixed', 'corrcoef_type': 'p'}, # 20
    {'fried_num': 2, 'noise': 0.15, 'norm': False, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 'p'}, # 21
    {'fried_num': 2, 'noise': 0.15, 'norm':  True, 'c_val_bounds':     'fixed', 'corrcoef_type': 'p'}, # 22
    {'fried_num': 2, 'noise': 0.15, 'norm':  True, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 'p'}, # 23
 
    {'fried_num': 2, 'noise': 0.15, 'norm': False, 'c_val_bounds':     'fixed', 'corrcoef_type': 's'}, # 28
    {'fried_num': 2, 'noise': 0.15, 'norm': False, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 's'}, # 29
    {'fried_num': 2, 'noise': 0.15, 'norm':  True, 'c_val_bounds':     'fixed', 'corrcoef_type': 's'}, # 30
    {'fried_num': 2, 'noise': 0.15, 'norm':  True, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 's'}, # 31


    {'fried_num': 3, 'noise': 0.15, 'norm': False, 'c_val_bounds':     'fixed', 'corrcoef_type': 'p'}, # 36
    {'fried_num': 3, 'noise': 0.15, 'norm': False, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 'p'}, # 37
    {'fried_num': 3, 'noise': 0.15, 'norm':  True, 'c_val_bounds':     'fixed', 'corrcoef_type': 'p'}, # 38
    {'fried_num': 3, 'noise': 0.15, 'norm':  True, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 'p'}, # 39
    
    {'fried_num': 3, 'noise': 0.15, 'norm': False, 'c_val_bounds':     'fixed', 'corrcoef_type': 's'}, # 44
    {'fried_num': 3, 'noise': 0.15, 'norm': False, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 's'}, # 45
    {'fried_num': 3, 'noise': 0.15, 'norm':  True, 'c_val_bounds':     'fixed', 'corrcoef_type': 's'}, # 46
    {'fried_num': 3, 'noise': 0.15, 'norm':  True, 'c_val_bounds': (1e-9, 1e5), 'corrcoef_type': 's'}, # 47
]

Pt. 1: No white kernel

In [13]:
%%capture
figs = []

for param in params:
    for depth in depths:

        out = runner(**param, ntk_depth=depths, ansiotropic=False, white=False, figs=False)

        if out["Norm"] == True:
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "X"] = out['rbf_' + str(depth)]
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "f(X)"] = out['f_rbf_' + str(depth)]

            if out['Corr Type'] == 'Pearson ':
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            out["Constant Value Bounds"]), "Spearman f(Xp)"] = out['fs_rbf_' + str(depth)]
            else:
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            out["Constant Value Bounds"]), "Pearson f(Xs)"] = out['fp_rbf_' + str(depth)]
        else:
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "X"] = out['lpk_' + str(depth)]
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "f(X)"] = out['f_lpk_' + str(depth)]

            if out['Corr Type'] == 'Pearson ':
                df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                            out["Constant Value Bounds"]), "Spearman f(Xp)"] = out['fs_lpk_' + str(depth)]
            else:
                df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                            out["Constant Value Bounds"]), "Pearson f(Xs)"] = out['fp_lpk_' + str(depth)]


In [None]:
df_lpk_no_noise, df_rbf_no_noise = df_lpk, df_rbf

In [None]:
df_lpk_no_noise

In [None]:
df_rbf_no_noise

Pt. 2: White Noise but FIXED

In [101]:
df_lpk_noise_fixed_1 = df_lpk
df_rbf_noise_fixed_1 = df_rbf

By optimizing pearson we are not improving spearman.  Furthermore by optimizing spearman, we do not improve pearson... Mostly.

In [102]:
df_lpk_noise_fixed_1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Pearson X,Spearman X,Pearson f(X),Spearman f(Xp),Spearman f(X),Pearson f(Xs)
Dataset,Noise,Norm,Depth,Constant Value Bounds,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Friedman 1,False,False,3,fixed,2.342221,12212249.33151,0.888357,0.825083,0.853141,0.864595
Friedman 1,False,False,3,unfixed,2.342221,61332654.814438,0.888357,0.825083,0.843528,0.882056
Friedman 1,False,False,100,fixed,0.233227,0.238573,0.995602,0.932973,0.949487,0.995593
Friedman 1,False,False,100,unfixed,0.233227,0.238573,0.995602,0.932973,0.949487,0.995593
Friedman 1,False,True,3,fixed,4.527338,1046592.322171,0.999089,0.950759,0.969085,0.997143
Friedman 1,False,True,3,unfixed,4.527338,279.857155,0.999089,0.950759,0.951407,0.998594
Friedman 1,False,True,100,fixed,0.119707,0.13037,0.99925,0.984278,0.984446,0.999085
Friedman 1,False,True,100,unfixed,0.119707,0.13037,0.99925,0.984278,0.984446,0.999085
Friedman 1,True,False,3,fixed,0.875154,1.459072,0.734172,0.739274,0.74057,0.732967
Friedman 1,True,False,3,unfixed,7.225025,6.040632,0.825463,0.830471,0.829583,0.824785


In [103]:
df_rbf_noise_fixed_1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Pearson X,Spearman X,Pearson f(X),Spearman f(Xp),Spearman f(X),Pearson f(Xs)
Dataset,Noise,Depth,Constant Value Bounds,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Friedman 1,False,3,fixed,0.333568,0.31932,0.967224,0.912895,0.933525,0.966375
Friedman 1,False,3,unfixed,0.333569,0.31932,0.967224,0.912895,0.933525,0.966375
Friedman 1,False,100,fixed,0.134175,0.155386,0.997816,0.966421,0.982682,0.99683
Friedman 1,False,100,unfixed,0.134175,0.155386,0.997816,0.966421,0.982682,0.99683
Friedman 1,True,3,fixed,0.458791,0.473989,0.777497,0.787387,0.787435,0.777447
Friedman 1,True,3,unfixed,1.147336,9.999995,0.931725,0.951575,0.75814,0.744626
Friedman 1,True,100,fixed,0.353275,3.819726,0.73573,0.88462,0.877816,0.719582
Friedman 1,True,100,unfixed,0.358709,9.999995,0.789346,0.894833,0.877792,0.718879
Friedman 2,False,3,fixed,71.022373,39.364459,0.586039,0.660054,0.665707,0.585042
Friedman 2,False,3,unfixed,150675.558706,361002.383175,0.606962,0.679916,0.674839,0.591534


Pt 3: White Noise but OPTIMIZED

In [None]:
%%capture

for param in params:
    for depth in depths:

        out = runner(*param, ntk_depth=depths, ansiotropic=False, white=True, figs=False)

        if out["Norm"] == True:
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "X"] = out['rbf_' + str(depth)]
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "f(X)"] = out['f_rbf_' + str(depth)]

            if out['Corr Type'] == 'Pearson ':
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            out["Constant Value Bounds"]), "Spearman f(Xp)"] = out['fs_rbf_' + str(depth)]
            else:
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            out["Constant Value Bounds"]), "Pearson f(Xs)"] = out['fp_rbf_' + str(depth)]
        else:
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "X"] = out['lpk_' + str(depth)]
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        out["Constant Value Bounds"]), out['Corr Type'] + "f(X)"] = out['f_lpk_' + str(depth)]

            if out['Corr Type'] == 'Pearson ':
                df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                            out["Constant Value Bounds"]), "Spearman f(Xp)"] = out['fs_lpk_' + str(depth)]
            else:
                df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                            out["Constant Value Bounds"]), "Pearson f(Xs)"] = out['fp_lpk_' + str(depth)]


In [None]:
df_lpk_noise_optimized, df_rbf_noise_optimized = df_lpk, df_rbf

In [91]:
df_lpk_noise_optimized

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Pearson X,Spearman X,Pearson f(X),Spearman f(Xp),Spearman f(X),Pearson f(Xs)
Dataset,Noise,Norm,Depth,Constant Value Bounds,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Friedman 1,False,False,3,fixed,2.342221,12212249.33151,0.888357,0.825083,0.853141,0.864595
Friedman 1,False,False,3,unfixed,2.342221,61332654.814438,0.888357,0.825083,0.843528,0.882056
Friedman 1,False,False,100,fixed,0.233227,0.238573,0.995602,0.932973,0.949487,0.995593
Friedman 1,False,False,100,unfixed,0.233227,0.238573,0.995602,0.932973,0.949487,0.995593
Friedman 1,False,True,3,fixed,4.527338,1046592.322171,0.999089,0.950759,0.969085,0.997143
Friedman 1,False,True,3,unfixed,4.527338,279.857155,0.999089,0.950759,0.951407,0.998594
Friedman 1,False,True,100,fixed,0.119707,0.13037,0.99925,0.984278,0.984446,0.999085
Friedman 1,False,True,100,unfixed,0.119707,0.13037,0.99925,0.984278,0.984446,0.999085
Friedman 1,True,False,3,fixed,0.877504,3.819726,0.735696,0.740762,0.73661,0.727933
Friedman 1,True,False,3,unfixed,0.983672,0.76903,0.858571,0.840924,0.857234,0.856699


In [92]:
df_rbf_noise_optimized

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Pearson X,Spearman X,Pearson f(X),Spearman f(Xp),Spearman f(X),Pearson f(Xs)
Dataset,Noise,Depth,Constant Value Bounds,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Friedman 1,False,3,fixed,0.333568,0.31932,0.967224,0.912895,0.933525,0.966375
Friedman 1,False,3,unfixed,0.333569,0.31932,0.967224,0.912895,0.933525,0.966375
Friedman 1,False,100,fixed,0.134175,0.155386,0.997816,0.966421,0.982682,0.99683
Friedman 1,False,100,unfixed,0.134175,0.155386,0.997816,0.966421,0.982682,0.99683
Friedman 1,True,3,fixed,0.454959,0.438926,0.781856,0.792199,0.792547,0.781778
Friedman 1,True,3,unfixed,6.14364,9.999995,0.744626,0.75814,0.75814,0.744626
Friedman 1,True,100,fixed,0.364442,3.819726,0.737862,0.887141,0.877816,0.719576
Friedman 1,True,100,unfixed,6.143558,9.999995,0.718879,0.877792,0.877792,0.718879
Friedman 2,False,3,fixed,71.022373,39.364459,0.586039,0.660054,0.665707,0.585042
Friedman 2,False,3,unfixed,150675.558706,361002.383175,0.606962,0.679916,0.674839,0.591534


rbf lengthscale infinite -> linear (hyperplane)

## Experiment 2 - Ansiotropic LPK/RBF kernels
* Friedman 1 Noisy Norm
* Friedman 2 Noiseless Nonnormed

In [16]:
arrays_lpk = [
    ['Friedman 1', 'Friedman 2', 'Friedman 3'],
    [False, True], # Noise
    [False, True], # Norm
    [3, 100], # Depths
]

arrays_rbf = [
    ['Friedman 1', 'Friedman 2', 'Friedman 3'],
    [False, True], # Noise
    [3, 100], # Depths
]

index_lpk = pd.MultiIndex.from_product(
    arrays_lpk, names=["Dataset", "Noise", "Norm", "Depth"])

index_rbf = pd.MultiIndex.from_product(
    arrays_rbf, names=["Dataset", "Noise", "Depth"])


df_lpk = pd.DataFrame(
    index=index_lpk,
    columns=['Pearson f(X)', 'Spearman f(Xp)', 'Spearman f(X)', 'Pearson f(Xs)'])

df_rbf = pd.DataFrame(
    index=index_rbf,
    columns=['Pearson f(X)', 'Spearman f(Xp)', 'Spearman f(X)', 'Pearson f(Xs)'])


In [17]:
df_lpk

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Pearson f(X),Spearman f(Xp),Spearman f(X),Pearson f(Xs)
Dataset,Noise,Norm,Depth,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Friedman 1,False,False,3,,,,
Friedman 1,False,False,100,,,,
Friedman 1,False,True,3,,,,
Friedman 1,False,True,100,,,,
Friedman 1,True,False,3,,,,
Friedman 1,True,False,100,,,,
Friedman 1,True,True,3,,,,
Friedman 1,True,True,100,,,,
Friedman 2,False,False,3,,,,
Friedman 2,False,False,100,,,,


In [18]:
% % capture
figs = []
params = [
    #    fried_num, noise,  norm, c_val_bounds, ntk_depth, corrcoef_type
    [1,   0.0, False,  (1e-9, 1e5),  [3, 100],           'p'],  # 1
    [1,   0.0,  True,  (1e-9, 1e5),  [3, 100],           'p'],  # 3
    [1,  0.15, False,  (1e-9, 1e5),  [3, 100],           'p'],  # 5
    [1,  0.15,  True,  (1e-9, 1e5),  [3, 100],           'p'],  # 7

    [1,   0.0, False,  (1e-9, 1e5),  [3, 100],           's'],  # 9
    [1,   0.0,  True,  (1e-9, 1e5),  [3, 100],           's'],  # 11
    [1,  0.15, False,  (1e-9, 1e5),  [3, 100],           's'],  # 13
    [1,  0.15,  True,  (1e-9, 1e5),  [3, 100],           's'],  # 15


    [2,   0.0, False,  (1e-9, 1e5),  [3, 100],           'p'],  # 17
    [2,   0.0,  True,  (1e-9, 1e5),  [3, 100],           'p'],  # 19
    [2,  0.15, False,  (1e-9, 1e5),  [3, 100],           'p'],  # 21
    [2,  0.15,  True,  (1e-9, 1e5),  [3, 100],           'p'],  # 23

    [2,   0.0, False,  (1e-9, 1e5),  [3, 100],           's'],  # 25
    [2,   0.0,  True,  (1e-9, 1e5),  [3, 100],           's'],  # 27
    [2,  0.15, False,  (1e-9, 1e5),  [3, 100],           's'],  # 29
    [2,  0.15,  True,  (1e-9, 1e5),  [3, 100],           's'],  # 31


    [3,   0.0, False,  (1e-9, 1e5),  [3, 100],           'p'],  # 33
    [3,   0.0,  True,  (1e-9, 1e5),  [3, 100],           'p'],  # 35
    [3,  0.15, False,  (1e-9, 1e5),  [3, 100],           'p'],  # 37
    [3,  0.15,  True,  (1e-9, 1e5),  [3, 100],           'p'],  # 39

    [3,   0.0, False,  (1e-9, 1e5),  [3, 100],           's'],  # 41
    [3,   0.0,  True,  (1e-9, 1e5),  [3, 100],           's'],  # 43
    [3,  0.15, False,  (1e-9, 1e5),  [3, 100],           's'],  # 45
    [3,  0.15,  True,  (1e-9, 1e5),  [3, 100],           's'],  # 47
]

for param in params:
    out = runner(*param, ansiotropic=True, figs=False)

    for depth in param[4]:
        df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                    out['Corr Type'] + "X"] = out['lpk_' + depth]
        df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                    out['Corr Type'] + "f(X)"] = out['f_lpk_' + depth]

        if out['Corr Type'] == 'Pearson ':
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        "Spearman f(Xp)"] = out['fs_lpk_' + depth]
        else:
            df_lpk.loc[(out["Dataset"], out["Noise"], out["Norm"],   depth,
                        "Pearson f(Xs)"] = out['fp_lpk_' + depth]

        if out["Norm"] == True:
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out['Corr Type'] + "X"] = out['rbf_' + depth]
            df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                        out['Corr Type'] + "f(X)"] = out['f_rbf_' + depth]
            if out['Corr Type'] == 'Pearson ':
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            "Spearman f(Xp)"] = out['fs_rbf_' + depth]
            else:
                df_rbf.loc[(out["Dataset"], out["Noise"],   depth,
                            "Pearson f(Xs)"] = out['fp_rbf_' + depth]


SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' on line 42 (1183362044.py, line 43)