In [1]:
from utils import *
from sklearn import metrics
import pandas as pd

# Experiment runner

In [2]:
def real_experiment(data, depths, alpha=1e-5):
    """
    Data format := `[X_train, y_train, X_test, y_test, norm : bool, noise : bool, name : str]`

    Outputs dictionary containing `dataset`, `means`, `kernel`, 
    `ntk`, `lap`, and `gaus` information
    """
    norm = data[-3]
    noise = data[-2]
    name = data[-1]

    print(f'\n{name} :\nnorm  = {norm}\nnoise = {noise}\ndepth = {depths}')

    exp_data = {}


    means_n = []
    for depth in depths:


        #########################
        # Neural tangent Kernel #
        #########################


        ntk = (
            ConstantKernel(constant_value=1) + 
            NTK(depth=depth, bias=0.1)
        )

        if noise:
            ntk += WhiteKernel(noise_level=0.1)

        gp_n = GPR(kernel=ntk, alpha=alpha, normalize_y=True,
            n_restarts_optimizer=9, random_state=29834057)

        gp_n.fit(data[0], data[1])
        mean_n = gp_n.predict(data[2])
        print(gp_n.kernel_)


        #########################
        #         Data          #
        #########################


        if noise: 
            const_val_n = gp_n.kernel_.get_params()['k1__k1__constant_value']
            noise_lvl_n = gp_n.kernel_.get_params()['k2__noise_level']
            bias = gp_n.kernel_.get_params()['k1__k2__bias']
        else:
            const_val_n = gp_n.kernel_.get_params()['k1__constant_value']
            noise_lvl_n = None
            bias = gp_n.kernel_.get_params()['k2__bias']

        means_n.append(mean_n.ravel())

        exp_data['kernel'] = {
            f'ntk_{depth}' : {
                'C' : const_val_n,
                'W' : noise_lvl_n,
                'depth' : depths,
                'bias' : bias
            }
        }
        exp_data[f'ntk_{depth}'] = {
            'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_n.ravel(), squared=False),
            'r2' : metrics.r2_score(data[3].ravel(), mean_n.ravel())
        }




    #########################
    #   Lap + Gaus Kernel   #
    #########################    


    lap = (
        ConstantKernel(constant_value=1) + 
        Matern(nu=1/2, length_scale=1)
    )

    gaus = (
        ConstantKernel(constant_value=1) + 
        Matern(nu=np.inf, length_scale=1)
    )

    if noise:
        ntk += WhiteKernel(noise_level=0.1)
        lap += WhiteKernel(noise_level=0.1)
        gaus += WhiteKernel(noise_level=0.1)


    gp_l = GPR(kernel=lap, alpha=alpha, normalize_y=True,
        n_restarts_optimizer=9, random_state=29834057)

    gp_g = GPR(kernel=gaus, alpha=alpha, normalize_y=True,
        n_restarts_optimizer=9, random_state=29834057)


    gp_l.fit(data[0], data[1])
    mean_l = gp_l.predict(data[2])
    print(gp_l.kernel_)

    gp_g.fit(data[0], data[1])
    mean_g = gp_g.predict(data[2])
    print(gp_g.kernel_)


    #########################
    #         Data          #
    #########################


    if noise: 
        const_val_l = gp_l.kernel_.get_params()['k1__k1__constant_value']
        const_val_g = gp_g.kernel_.get_params()['k1__k1__constant_value']

        noise_lvl_l = gp_l.kernel_.get_params()['k2__noise_level']
        noise_lvl_g = gp_g.kernel_.get_params()['k2__noise_level']

        ell_l = gp_l.kernel_.get_params()['k1__k2__length_scale']
        ell_g = gp_g.kernel_.get_params()['k1__k2__length_scale']
    else:
        const_val_l = gp_l.kernel_.get_params()['k1__constant_value']
        const_val_g = gp_g.kernel_.get_params()['k1__constant_value']

        noise_lvl_l = None
        noise_lvl_g = None
        
        ell_l = gp_l.kernel_.get_params()['k2__length_scale']
        ell_g = gp_g.kernel_.get_params()['k2__length_scale']


    exp_data['dataset'] = {
        'name' : name, 
        'norm' : norm,
        'noise': noise,
        'test' : [data[2], data[3]]
    }

    exp_data['means'] = (*means_n, mean_l.ravel(), mean_g.ravel())


    exp_data['kernel'] = {
        'lap' : {
            'C' : const_val_l,
            'W' : noise_lvl_l,
            'ell' : ell_l
        },
        'gaus' : {
            'C' : const_val_g,
            'W' : noise_lvl_g,
            'ell' : ell_g
        }
    }
    exp_data['lap'] = {
        'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_l.ravel(), squared=False),
        'r2' : metrics.r2_score(data[3].ravel(), mean_l.ravel())
    }
    exp_data['gaus'] = {
        'rmse' : metrics.mean_squared_error(data[3].ravel(), mean_g.ravel(), squared=False),
        'r2' : metrics.r2_score(data[3].ravel(), mean_g.ravel())
    }

    return exp_data

# Datasets

In [3]:
# [X_train, y_train, X_test, y_test, norm : bool, noise : bool, name : str]
datasets = []

In [4]:
concrete = pd.read_csv('./real_world_data/concrete.csv', header=0)

names_c = concrete.columns

X = concrete.drop(names_c[-1], axis=1)
y = concrete[names_c[-1]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=13450978)

datasets.append([X_train, y_train, X_test, y_test, False, False, 'Concrete'])
datasets.append([X_train, y_train, X_test, y_test, False, True, 'Concrete'])

X_train, X_test, y_train, y_test = train_test_split(normalize(X, axis=1), y, test_size=0.25, random_state=13450978)

datasets.append([X_train, y_train, X_test, y_test, True, False, 'Concrete'])
datasets.append([X_train, y_train, X_test, y_test, True, True, 'Concrete'])

In [5]:
forest_fires = pd.read_csv('./real_world_data/forestfires.csv', header=0)
forest_fires.drop(['month', 'day'], axis=1, inplace=True)

names_f = forest_fires.columns

X = forest_fires.drop('area', axis=1)
y = forest_fires['area']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=879631245)

datasets.append([X_train, y_train, X_test, y_test, False, False, 'Forest Fires'])
datasets.append([X_train, y_train, X_test, y_test, False, True, 'Forest Fires'])

X_train, X_test, y_train, y_test = train_test_split(normalize(X, axis=1), y, test_size=0.25, random_state=879631245)

datasets.append([X_train, y_train, X_test, y_test, True, False, 'Forest Fires'])
datasets.append([X_train, y_train, X_test, y_test, True, True, 'Forest Fires'])

# Results

In [6]:
arrays = [
    ["Concrete", "Forest Fires"],
    ['rmse', 'r2'],
    [False, True],  # Noise
]

index = pd.MultiIndex.from_product(
    arrays, names=['Dataset', 'Metric', 'Noise'])


df_rd = pd.DataFrame(
    index=index,
    columns=['NTK D=3', 'NTK D=25', 'NTK D=100', 'Laplace', 'Gaussian']
)

df_sd = pd.DataFrame(
    index=index,
    columns=['NTK D=3', 'NTK D=25', 'NTK D=100', 'Laplace', 'Gaussian']
)

df_rd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,NTK D=3,NTK D=25,NTK D=100,Laplace,Gaussian
Dataset,Metric,Noise,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Concrete,rmse,False,,,,,
Concrete,rmse,True,,,,,
Concrete,r2,False,,,,,
Concrete,r2,True,,,,,
Forest Fires,rmse,False,,,,,
Forest Fires,rmse,True,,,,,
Forest Fires,r2,False,,,,,
Forest Fires,r2,True,,,,,


In [7]:
experiment_outputs = []
depths = (3, 25, 100)
for data in datasets:
    experiment_outputs.append(real_experiment(data, depths))


Concrete :
norm  = False
noise = False
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.274**2 + NTK(depth=3, c=2.000, bias=129.560)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00405**2 + NTK(depth=25, c=2.000, bias=384.429)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.0148**2 + NTK(depth=100, c=2.000, bias=841.000)
0.0847**2 + Matern(length_scale=13.5, nu=0.5)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.014**2 + Matern(length_scale=1e-05, nu=inf)

Concrete :
norm  = False
noise = True
depth = (3, 25, 100)
0.0266**2 + NTK(depth=3, c=2.000, bias=443.406) + WhiteKernel(noise_level=0.0426)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.135**2 + NTK(depth=25, c=2.000, bias=1812.640) + WhiteKernel(noise_level=0.0399)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.109**2 + NTK(depth=100, c=2.000, bias=30062.378) + WhiteKernel(noise_level=0.033)
0.291**2 + Matern(length_scale=311, nu=0.5) + WhiteKernel(noise_level=0.0453)
0.178**2 + Matern(length_scale=113, nu=inf) + WhiteKernel(noise_level=0.0872)

Concrete :
norm  = True
noise = False
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


2.83**2 + NTK(depth=3, c=2.000, bias=0.000)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.166**2 + NTK(depth=25, c=2.000, bias=0.000)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00957**2 + NTK(depth=100, c=2.000, bias=0.000)
0.124**2 + Matern(length_scale=0.0118, nu=0.5)
0.0217**2 + Matern(length_scale=3.91e-05, nu=inf)

Concrete :
norm  = True
noise = True
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.95**2 + NTK(depth=3, c=2.000, bias=0.000) + WhiteKernel(noise_level=0.185)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00602**2 + NTK(depth=25, c=2.000, bias=0.000) + WhiteKernel(noise_level=0.0502)




0.00316**2 + NTK(depth=100, c=2.000, bias=0.000) + WhiteKernel(noise_level=0.033)
0.419**2 + Matern(length_scale=0.217, nu=0.5) + WhiteKernel(noise_level=0.0443)
0.132**2 + Matern(length_scale=0.0746, nu=inf) + WhiteKernel(noise_level=0.0888)

Forest Fires :
norm  = False
noise = False
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.999**2 + NTK(depth=3, c=2.000, bias=44.993)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00316**2 + NTK(depth=25, c=2.000, bias=127.373)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00473**2 + NTK(depth=100, c=2.000, bias=252.201)




0.00316**2 + Matern(length_scale=2.03, nu=0.5)




0.00316**2 + Matern(length_scale=2.15, nu=inf)

Forest Fires :
norm  = False
noise = True
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.605**2 + NTK(depth=3, c=2.000, bias=1258.889) + WhiteKernel(noise_level=0.986)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00316**2 + NTK(depth=25, c=2.000, bias=78850.373) + WhiteKernel(noise_level=0.972)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00782**2 + NTK(depth=100, c=2.000, bias=253.729) + WhiteKernel(noise_level=0.00379)




0.00316**2 + Matern(length_scale=2.04, nu=0.5) + WhiteKernel(noise_level=0.00382)




0.00316**2 + Matern(length_scale=2.15, nu=inf) + WhiteKernel(noise_level=0.00379)

Forest Fires :
norm  = True
noise = False
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00316**2 + NTK(depth=3, c=2.000, bias=0.001)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00316**2 + NTK(depth=25, c=2.000, bias=0.000)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.00316**2 + NTK(depth=100, c=2.000, bias=0.000)




0.00316**2 + Matern(length_scale=1e-05, nu=0.5)




0.00316**2 + Matern(length_scale=1e-05, nu=inf)

Forest Fires :
norm  = True
noise = True
depth = (3, 25, 100)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


0.274**2 + NTK(depth=3, c=2.000, bias=232.256) + WhiteKernel(noise_level=0.995)
0.274**2 + NTK(depth=25, c=2.000, bias=265.039) + WhiteKernel(noise_level=0.971)




0.00316**2 + NTK(depth=100, c=2.000, bias=0.083) + WhiteKernel(noise_level=0.878)




0.00316**2 + Matern(length_scale=0.00264, nu=0.5) + WhiteKernel(noise_level=0.00382)
0.00316**2 + Matern(length_scale=1e-05, nu=inf) + WhiteKernel(noise_level=0.00382)




In [13]:
test = load_data('exp_real')

8

In [8]:
for exp in experiment_outputs:
    name = exp['dataset']['name']
    noise = exp['dataset']['noise']


    # if exp['dataset']['norm']:
    #     for depth in depths:
    #         df_sd[f'NTK D={depth}'][name, 'rmse', noise] = exp[f'ntk_{depth}']['rmse']
    #         df_sd[f'NTK D={depth}'][name, 'r2', noise] = exp[f'ntk_{depth}']['r2']
        
    #     df_sd['Laplace'][name, 'rmse', noise] = exp['lap']['rmse']
    #     df_sd['Laplace'][name, 'r2', noise] = exp['lap']['r2']
    #     df_sd['Gaussian'][name, 'rmse', noise] = exp['gaus']['rmse']
    #     df_sd['Gaussian'][name, 'r2', noise] = exp['gaus']['r2']
    # else:
    #     for depth in depths:
    #         df_rd[f'NTK D={depth}'][name, 'rmse', noise] = exp[f'ntk_{depth}']['rmse']
    #         df_rd[f'NTK D={depth}'][name, 'r2', noise] = exp[f'ntk_{depth}']['r2']
        
    #     df_rd['Laplace'][name, 'rmse', noise] = exp['lap']['rmse']
    #     df_rd['Laplace'][name, 'r2', noise] = exp['lap']['r2']
    #     df_rd['Gaussian'][name, 'rmse', noise] = exp['gaus']['rmse']
    #     df_rd['Gaussian'][name, 'r2', noise] = exp['gaus']['r2']

    if exp['dataset']['norm']:
        for depth in depths:
            df_sd[f'NTK D={depth}'][name, 'rmse', noise] = exp[f'ntk_{depth}']['rmse']
            df_sd[f'NTK D={depth}'][name, 'r2', noise] =
        
        df_sd['Laplace'][name, 'rmse', noise] = exp['lap']['rmse']
        df_sd['Laplace'][name, 'r2', noise] = exp['lap']['r2']
        df_sd['Gaussian'][name, 'rmse', noise] = exp['gaus']['rmse']
        df_sd['Gaussian'][name, 'r2', noise] = exp['gaus']['r2']
    else:
        for depth in depths:
            df_rd[f'NTK D={depth}'][name, 'rmse', noise] = exp[f'ntk_{depth}']['rmse']
            df_rd[f'NTK D={depth}'][name, 'r2', noise] = exp[f'ntk_{depth}']['r2']
        
        df_rd['Laplace'][name, 'rmse', noise] = exp['lap']['rmse']
        df_rd['Laplace'][name, 'r2', noise] = exp['lap']['r2']
        df_rd['Gaussian'][name, 'rmse', noise] = exp['gaus']['rmse']
        df_rd['Gaussian'][name, 'r2', noise] = exp['gaus']['r2']

In [9]:
# save_data(experiment_outputs, 'exp_real')

In [11]:
df_rd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,NTK D=3,NTK D=25,NTK D=100,Laplace,Gaussian
Dataset,Metric,Noise,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Concrete,rmse,False,5.04729,5.201556,5.900334,11.448858,15.485845
Concrete,rmse,True,5.421162,5.555517,6.072096,5.508261,5.761243
Concrete,r2,False,0.900508,0.894333,0.864036,0.488086,0.063426
Concrete,r2,True,0.885222,0.879463,0.856004,0.881505,0.87037
Forest Fires,rmse,False,38.78891,37.720642,34.160519,16.423256,17.255082
Forest Fires,rmse,True,18.332155,22.107979,34.137496,16.421816,17.247906
Forest Fires,r2,False,-6.786299,-6.363327,-5.038997,-0.395836,-0.540813
Forest Fires,r2,True,-0.739174,-1.52938,-5.03086,-0.395591,-0.539532


In [12]:
df_sd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,NTK D=3,NTK D=25,NTK D=100,Laplace,Gaussian
Dataset,Metric,Noise,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Concrete,rmse,False,5.034478,5.194905,5.89325,10.603514,15.480197
Concrete,rmse,True,7.250604,5.70731,6.069782,5.542235,5.755933
Concrete,r2,False,0.901012,0.894603,0.864362,0.560891,0.064109
Concrete,r2,True,0.794685,0.872786,0.856114,0.880038,0.870609
Forest Fires,rmse,False,38.51829,37.494737,34.006587,16.149912,16.149912
Forest Fires,rmse,True,17.887548,22.108632,25.115341,16.768897,16.149721
Forest Fires,r2,False,-6.678032,-6.275395,-4.984695,-0.349759,-0.349759
Forest Fires,r2,True,-0.655837,-1.529529,-2.26433,-0.455207,-0.349727
