In [1]:
from utils import *
from sklearn import metrics
import pandas as pd

# Experiment runner

In [2]:
def real_experiment(data, depths, alpha=1e-5):
    """
    Data format := `[X_train, y_train, X_test, y_test, norm : bool, noise : bool, name : str]`

    Outputs dictionary containing `dataset`, `means`, `kernel`, 
    `ntk`, `lap`, and `gaus` information
    """
    norm = data[-3]
    noise = data[-2]
    name = data[-1]

    print(f'\n{name} :\nnorm  = {norm}\nnoise = {noise}\ndepth = {depths}')

    exp_data = {}


    means_n = []
    for depth in depths:


        #########################
        # Neural tangent Kernel #
        #########################


        ntk = (
            ConstantKernel(constant_value=1) + 
            NTK(depth=depth, bias=0.1)
        )

        if noise:
            ntk += WhiteKernel(noise_level=0.1)

        gp_n = GPR(kernel=ntk, alpha=alpha, normalize_y=True,
            n_restarts_optimizer=9, random_state=29834057)

        gp_n.fit(data[0], data[1])
        mean_n = gp_n.predict(data[2])
        print(gp_n.kernel_)


        #########################
        #         Data          #
        #########################


        if noise: 
            const_val_n = gp_n.kernel_.get_params()['k1__k1__constant_value']
            noise_lvl_n = gp_n.kernel_.get_params()['k2__noise_level']
            bias = gp_n.kernel_.get_params()['k1__k2__bias']
        else:
            const_val_n = gp_n.kernel_.get_params()['k1__constant_value']
            noise_lvl_n = None
            bias = gp_n.kernel_.get_params()['k2__bias']

        means_n.append(mean_n.ravel())

        exp_data['kernel'] = {
            f'ntk_{depth}' : {
                'C' : const_val_n,
                'W' : noise_lvl_n,
                'depth' : depths,
                'bias' : bias
            }
        }
        exp_data[f'ntk_{depth}'] = {
            'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_n.ravel(), squared=False),
            'r2' : metrics.r2_score(data[3].ravel(), mean_n.ravel())
        }




    #########################
    #   Lap + Gaus Kernel   #
    #########################    


    lap = (
        ConstantKernel(constant_value=1) + 
        Matern(nu=1/2, length_scale=1)
    )

    gaus = (
        ConstantKernel(constant_value=1) + 
        Matern(nu=np.inf, length_scale=1)
    )

    if noise:
        ntk += WhiteKernel(noise_level=0.1)
        lap += WhiteKernel(noise_level=0.1)
        gaus += WhiteKernel(noise_level=0.1)


    gp_l = GPR(kernel=lap, alpha=alpha, normalize_y=True,
        n_restarts_optimizer=9, random_state=29834057)

    gp_g = GPR(kernel=gaus, alpha=alpha, normalize_y=True,
        n_restarts_optimizer=9, random_state=29834057)


    gp_l.fit(data[0], data[1])
    mean_l = gp_l.predict(data[2])
    print(gp_l.kernel_)

    gp_g.fit(data[0], data[1])
    mean_g = gp_g.predict(data[2])
    print(gp_g.kernel_)


    #########################
    #         Data          #
    #########################


    if noise: 
        const_val_l = gp_l.kernel_.get_params()['k1__k1__constant_value']
        const_val_g = gp_g.kernel_.get_params()['k1__k1__constant_value']

        noise_lvl_l = gp_l.kernel_.get_params()['k2__noise_level']
        noise_lvl_g = gp_g.kernel_.get_params()['k2__noise_level']

        ell_l = gp_l.kernel_.get_params()['k1__k2__length_scale']
        ell_g = gp_g.kernel_.get_params()['k1__k2__length_scale']
    else:
        const_val_l = gp_l.kernel_.get_params()['k1__constant_value']
        const_val_g = gp_g.kernel_.get_params()['k1__constant_value']

        noise_lvl_l = None
        noise_lvl_g = None
        
        ell_l = gp_l.kernel_.get_params()['k2__length_scale']
        ell_g = gp_g.kernel_.get_params()['k2__length_scale']


    exp_data['dataset'] = {
        'name' : name, 
        'norm' : norm,
        'noise': noise,
        'test' : [data[2], data[3]]
    }

    exp_data['means'] = (*means_n, mean_l.ravel(), mean_g.ravel())


    exp_data['kernel'] = {
        'lap' : {
            'C' : const_val_l,
            'W' : noise_lvl_l,
            'ell' : ell_l
        },
        'gaus' : {
            'C' : const_val_g,
            'W' : noise_lvl_g,
            'ell' : ell_g
        }
    }
    exp_data['lap'] = {
        'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_l.ravel(), squared=False),
        'r2' : metrics.r2_score(data[3].ravel(), mean_l.ravel())
    }
    exp_data['gaus'] = {
        'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_g.ravel(), squared=False),
        'r2' : metrics.r2_score(data[3].ravel(), mean_g.ravel())
    }

    return exp_data

# Datasets

In [3]:
# [X_train, y_train, X_test, y_test, norm : bool, noise : bool, name : str]
datasets = []

In [4]:
forest_fires = pd.read_csv('./real_world_data/forestfires.csv', header=0)
forest_fires.drop(['month', 'day'], axis=1, inplace=True)

names_f = forest_fires.columns

X = forest_fires.drop('area', axis=1)
y = forest_fires['area']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=879631245)

datasets.append([X_train, y_train, X_test, y_test, False, False, 'Forest Fires'])
datasets.append([X_train, y_train, X_test, y_test, False, True, 'Forest Fires'])

X_train, X_test, y_train, y_test = train_test_split(normalize(X, axis=1), y, test_size=0.25, random_state=879631245)

datasets.append([X_train, y_train, X_test, y_test, True, False, 'Forest Fires'])
datasets.append([X_train, y_train, X_test, y_test, True, True, 'Forest Fires'])

In [5]:
abalone = pd.read_csv('./real_world_data/abalone.csv', header=None, 
    names=[
        'sex',
        'length',
        'diameter',
        'height',
        'whole_weight',
        'shucked_weight',
        'shell_weight',
        'viscera_weight',
        'rings'
    ])

names_c = abalone.columns

abalone.drop('sex', axis=1, inplace=True)

X = abalone.drop('rings', axis=1)
y = abalone['rings']
y += 1.5

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=879631245)

datasets.append([X_train, y_train, X_test, y_test, False, False, 'Abalone'])
datasets.append([X_train, y_train, X_test, y_test, False, True, 'Abalone'])

X_train, X_test, y_train, y_test = train_test_split(normalize(X, axis=1), y, test_size=0.25, random_state=879631245)

datasets.append([X_train, y_train, X_test, y_test, True, False, 'Abalone'])
datasets.append([X_train, y_train, X_test, y_test, True, True, 'Abalone'])

# Results

In [6]:
arrays = [
    ["Abalone", "Forest Fires"],
    ['rmse', 'r2'],
    [False, True],  # Noise
]

index = pd.MultiIndex.from_product(
    arrays, names=['Dataset', 'Metric', 'Noise'])


df_rd = pd.DataFrame(
    index=index,
    columns=['NTK D=3', 'NTK D=25', 'NTK D=100', 'Laplace', 'Gaussian']
)

df_sd = pd.DataFrame(
    index=index,
    columns=['NTK D=3', 'NTK D=25', 'NTK D=100', 'Laplace', 'Gaussian']
)

df_rd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,NTK D=3,NTK D=25,NTK D=100,Laplace,Gaussian
Dataset,Metric,Noise,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Abalone,rmse,False,,,,,
Abalone,rmse,True,,,,,
Abalone,r2,False,,,,,
Abalone,r2,True,,,,,
Forest Fires,rmse,False,,,,,
Forest Fires,rmse,True,,,,,
Forest Fires,r2,False,,,,,
Forest Fires,r2,True,,,,,


In [7]:
experiment_outputs = []
depths = (3, 25, 100)
for data in datasets:
    experiment_outputs.append(real_experiment(data, depths))


Forest Fires :
norm  = False
noise = False
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


KeyboardInterrupt: 

In [None]:
for exp in experiment_outputs:
    name = exp['dataset']['name']
    noise = exp['dataset']['noise']


    if exp['dataset']['norm']:
        for depth in depths:
            df_sd[f'NTK D={depth}'][name, 'rmse', noise] = exp[f'ntk_{depth}']['rmse']
            df_sd[f'NTK D={depth}'][name, 'r2', noise] = exp[f'ntk_{depth}']['r2']
        
        df_sd['Laplace'][name, 'rmse', noise] = exp['lap']['rmse']
        df_sd['Laplace'][name, 'r2', noise] = exp['lap']['r2']
        df_sd['Gaussian'][name, 'rmse', noise] = exp['gaus']['rmse']
        df_sd['Gaussian'][name, 'r2', noise] = exp['gaus']['r2']
    else:
        for depth in depths:
            df_rd[f'NTK D={depth}'][name, 'rmse', noise] = exp[f'ntk_{depth}']['rmse']
            df_rd[f'NTK D={depth}'][name, 'r2', noise] = exp[f'ntk_{depth}']['r2']
        
        df_rd['Laplace'][name, 'rmse', noise] = exp['lap']['rmse']
        df_rd['Laplace'][name, 'r2', noise] = exp['lap']['r2']
        df_rd['Gaussian'][name, 'rmse', noise] = exp['gaus']['rmse']
        df_rd['Gaussian'][name, 'r2', noise] = exp['gaus']['r2']