In [None]:
from utils import *
from sklearn import metrics
from sklearn.datasets import make_friedman2, make_friedman3
import pandas as pd

In [None]:
def real_experiment(data, depths, alpha=1e-5):
    """
    Data format := `[X_train, y_train, X_test, y_test, norm : bool, noise : bool, name : str]`

    Outputs dictionary containing `dataset`, `means`, `kernel`, 
    `ntk`, `lap`, and `gaus` information
    """
    norm = data[-3]
    noise = data[-2]
    name = data[-1]

    print(f'\n{name} :\nnorm  = {norm}\nnoise = {noise}\ndepth = {depths}')

    exp_data = {}


    means_n = []
    for depth in depths:


        #########################
        # Neural tangent Kernel #
        #########################


        ntk = (
            ConstantKernel(constant_value=1) + 
            NTK(depth=depth, bias=0.1)
        )

        if noise:
            ntk += WhiteKernel(noise_level=0.1)

        gp_n = GPR(kernel=ntk, alpha=alpha, normalize_y=True,
            n_restarts_optimizer=9, random_state=29834057)

        gp_n.fit(data[0], data[1])
        mean_n = gp_n.predict(data[2])
        print(gp_n.kernel_)


        #########################
        #         Data          #
        #########################


        if noise: 
            const_val_n = gp_n.kernel_.get_params()['k1__k1__constant_value']
            noise_lvl_n = gp_n.kernel_.get_params()['k2__noise_level']
            bias = gp_n.kernel_.get_params()['k1__k2__bias']
        else:
            const_val_n = gp_n.kernel_.get_params()['k1__constant_value']
            noise_lvl_n = None
            bias = gp_n.kernel_.get_params()['k2__bias']

        means_n.append(mean_n.ravel())

        exp_data['kernel'] = {
            f'ntk_{depth}' : {
                'C' : const_val_n,
                'W' : noise_lvl_n,
                'depth' : depths,
                'bias' : bias
            }
        }
        exp_data[f'ntk_{depth}'] = {
            'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_n.ravel(), squared=False),
            'r2' : metrics.r2_score(data[3].ravel(), mean_n.ravel())
        }




    #########################
    #   Lap + Gaus Kernel   #
    #########################    


    lap = (
        ConstantKernel(constant_value=1) + 
        Matern(nu=1/2, length_scale=1)
    )

    gaus = (
        ConstantKernel(constant_value=1) + 
        Matern(nu=np.inf, length_scale=1)
    )

    if noise:
        ntk += WhiteKernel(noise_level=0.1)
        lap += WhiteKernel(noise_level=0.1)
        gaus += WhiteKernel(noise_level=0.1)


    gp_l = GPR(kernel=lap, alpha=alpha, normalize_y=True,
        n_restarts_optimizer=9, random_state=29834057)

    gp_g = GPR(kernel=gaus, alpha=alpha, normalize_y=True,
        n_restarts_optimizer=9, random_state=29834057)


    gp_l.fit(data[0], data[1])
    mean_l = gp_l.predict(data[2])
    print(gp_l.kernel_)

    gp_g.fit(data[0], data[1])
    mean_g = gp_g.predict(data[2])
    print(gp_g.kernel_)


    #########################
    #         Data          #
    #########################


    if noise: 
        const_val_l = gp_l.kernel_.get_params()['k1__k1__constant_value']
        const_val_g = gp_g.kernel_.get_params()['k1__k1__constant_value']

        noise_lvl_l = gp_l.kernel_.get_params()['k2__noise_level']
        noise_lvl_g = gp_g.kernel_.get_params()['k2__noise_level']

        ell_l = gp_l.kernel_.get_params()['k1__k2__length_scale']
        ell_g = gp_g.kernel_.get_params()['k1__k2__length_scale']
    else:
        const_val_l = gp_l.kernel_.get_params()['k1__constant_value']
        const_val_g = gp_g.kernel_.get_params()['k1__constant_value']

        noise_lvl_l = None
        noise_lvl_g = None
        
        ell_l = gp_l.kernel_.get_params()['k2__length_scale']
        ell_g = gp_g.kernel_.get_params()['k2__length_scale']


    exp_data['dataset'] = {
        'name' : name, 
        'norm' : norm,
        'noise': noise,
        'test' : [data[2], data[3]]
    }

    exp_data['means'] = (*means_n, mean_l.ravel(), mean_g.ravel())


    exp_data['kernel'] = {
        'lap' : {
            'C' : const_val_l,
            'W' : noise_lvl_l,
            'ell' : ell_l
        },
        'gaus' : {
            'C' : const_val_g,
            'W' : noise_lvl_g,
            'ell' : ell_g
        }
    }
    exp_data['lap'] = {
        'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_l.ravel(), squared=False),
        'r2' : metrics.r2_score(data[3].ravel(), mean_l.ravel())
    }
    exp_data['gaus'] = {
        'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_g.ravel(), squared=False),
        'r2' : metrics.r2_score(data[3].ravel(), mean_g.ravel())
    }

    return exp_data

In [None]:
datasets = []
datasets_std = []

In [None]:
name = 'Friedman 2'
noise = 5

X_train, y_train = make_friedman2(noise=0.0, random_state=18397425)
_, y_train_noisy = make_friedman2(noise=noise, random_state=18397425)
X_norm_train= normalize(X_train, axis=1)

X_test, y_test = make_friedman2(noise=0.0, random_state=30189745)
_, y_test_noisy = make_friedman2(noise=noise, random_state=30189745)
X_norm_test= normalize(X_test, axis=1)

# [X_train, y_train, X_test, y_test, X_draw, norm, noise, name]
datasets.append([X_norm_train, y_train, X_norm_test, y_test, X_test, True, 0.0, name])
datasets.append([X_norm_train, y_train_noisy, X_norm_test, y_test_noisy, X_test, True, 0.15, name])

for i in range(0, X_train.shape[1]):
    X_train[:,i] = (X_train[:,i] - np.mean(X_train[:,i])) / np.std(X_train[:,i])
    X_test[:,i] = (X_test[:,i] - np.mean(X_train[:,i])) / np.std(X_train[:,i])

X_norm_train = normalize(X_train, axis=1)
X_norm_test = normalize(X_test, axis=1)

datasets_std.append([X_norm_train, y_train, X_norm_test, y_test, X_test, True, 0.0, name + ' Std'])
datasets_std.append([X_norm_train, y_train_noisy, X_norm_test, y_test_noisy, X_test, True, 0.15, name + ' Std'])

In [None]:
name = 'Friedman 3'
noise = 5

X_train, y_train = make_friedman3(noise=0.0, random_state=18397425)
_, y_train_noisy = make_friedman3(noise=noise, random_state=18397425)
X_norm_train= normalize(X_train, axis=1)

X_test, y_test = make_friedman3(noise=0.0, random_state=30189745)
_, y_test_noisy = make_friedman3(noise=noise, random_state=30189745)
X_norm_test= normalize(X_test, axis=1)

# [X_train, y_train, X_test, y_test, X_draw, norm, noise, name]
datasets.append([X_norm_train, y_train, X_norm_test, y_test, X_test, True, 0.0, name])
datasets.append([X_norm_train, y_train_noisy, X_norm_test, y_test_noisy, X_test, True, 0.15, name])

for i in range(0, X_train.shape[1]):
    X_train[:,i] = (X_train[:,i] - np.mean(X_train[:,i])) / np.std(X_train[:,i])
    X_test[:,i] = (X_test[:,i] - np.mean(X_train[:,i])) / np.std(X_train[:,i])

X_norm_train = normalize(X_train, axis=1)
X_norm_test = normalize(X_test, axis=1)

datasets_std.append([X_norm_train, y_train, X_norm_test, y_test, X_test, True, 0.0, name + ' Std'])
datasets_std.append([X_norm_train, y_train_noisy, X_norm_test, y_test_noisy, X_test, True, 0.15, name + ' Std'])

In [None]:
depths = (3, 25, 100)
experiment_output = []
for dataset in datasets:
    for depth in depths:
        experiment_output.append(experiment(dataset, depth=depth))

In [None]:
depths = (3, 25, 100)
experiment_output_std = []
for dataset in datasets_std:
    for depth in depths:
        experiment_output_std.append(real_experiment(dataset, depth=depth))

In [None]:
depths=(3,25,100)
arrays = [
    ['Friedman 2', 'Friedman 2 Std', 'Friedman 3', 'Friedman 3 Std'],
    [False, True],  # Noise
]

index = pd.MultiIndex.from_product(
    arrays, names=["Dataset", "Noise"])


df_gaus_sd = pd.DataFrame(
    index=index,
    columns=['D3rmse', 'D25rmse',  'D100rmse', 'D3corr', 'D25corr', 'D100corr'])

df_lap_sd = pd.DataFrame(
    index=index,
    columns=['D3rmse', 'D25rmse',  'D100rmse', 'D3corr', 'D25corr', 'D100corr'])

df_gaus_sd

In [None]:
for exp in experiment_output:
    depth = exp['kernel']['depth']
    name = exp['dataset']['name']
    noise = exp['dataset']['noise']

    if noise==0.0: 
        noisy = False 
    else: 
        noisy = True

    if name == 'Nonpolynomial':
        name = 'Nonpoly'

    if exp['dataset']['norm']:
        df_lap_sd[f'D{depth}rmse'][name, noisy]  = exp['lap']['pred_rmse']
        df_lap_sd[f'D{depth}corr'][name, noisy]  = exp['lap']['pred_corr']
        df_gaus_sd[f'D{depth}rmse'][name, noisy] = exp['gaus']['pred_rmse']
        df_gaus_sd[f'D{depth}corr'][name, noisy] = exp['gaus']['pred_corr']

for exp in experiment_output_std:
    depth = exp['kernel']['depth']
    name = exp['dataset']['name']
    noise = exp['dataset']['noise']

    if noise==0.0: 
        noisy = False 
    else: 
        noisy = True

    if name == 'Nonpolynomial':
        name = 'Nonpoly'

    if exp['dataset']['norm']:
        df_lap_sd[f'D{depth}rmse'][name, noisy]  = exp['lap']['pred_rmse']
        df_lap_sd[f'D{depth}corr'][name, noisy]  = exp['lap']['pred_corr']
        df_gaus_sd[f'D{depth}rmse'][name, noisy] = exp['gaus']['pred_rmse']
        df_gaus_sd[f'D{depth}corr'][name, noisy] = exp['gaus']['pred_corr']

In [None]:
df_lap_sd

In [None]:
df_gaus_sd