In [2]:
import sys
sys.path.append('../..')
sys.path.append('../data')
sys.path.append('../../helper_code')

from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
import warnings
import numpy as np
import pandas as pd
from IPython.display import display
import copy

from helper_code.custom_kernel import *

## Load Data ##

In [3]:
# Load Data

input_dataset = ['c', 'c_lexi', 'CE', 'CE_lexi', 'CSE', 'CSE_lexi']
dataset_dict = {}

for data in input_dataset:
    dataset_dict[data] = pd.read_csv(f'../data/benzene_training_data/[Benz]_{data}.csv')
delta_delta_total_energy = pd.read_csv('../data/benzene_training_data/DD_e_tot (kcal).csv')

## Polynomial Kernel ##

### Poly_lexi_delta_tot ###

In [None]:
# Small alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
# Small alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
# large alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
# large alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-12, 1e-10, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-4, num=80), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-14, 1e-12, num=80),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-2, num=120), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-13, 1e-12, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(2e-13, 5e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=100), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(3e-13, 4e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.002, 0.003, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

### Poly_lexi_nd_delta_tot ###

In [None]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-9, 1e-7, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-11, 1e-9, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 2e-6, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-13, 1e-11, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-15, 1e-13, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-17, 1e-15, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

### Poly_sorted_delta_tot ###

In [None]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-12, 1e-10, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-14, 1e-12, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(6e-13, 9e-13, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(3e-5, 5e-5, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

### Poly_coulomb_delta_tot ###

In [None]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(0.001, 0.1, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(0.1, 1, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.01, 0.1, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(0.1, 0.2, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.1, 1, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(0.1, 0.2, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1, 10, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

## Gaussian Kernel ##

### Gaussian_lexi_delta_tot

In [None]:
param_grid = {
    'alpha': np.linspace(1e-15, 1e-13, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-17, 1e-15, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(2e-7, 4e-7, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

### Gaussian_lexi_nd_delta_tot ###

In [None]:
# Small alpha, small gamma

param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-5), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-9, 1e-7, num=51, endpoint=True),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=51, endpoint=True),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-7, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-6, 5e-6, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

### Gaussian_sorted_delta_tot ###

In [None]:
# Small alpha, small gamma

param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-5), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [3]:
param_grid = {
    'alpha': np.linspace(1e-5, 1e-3, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-6, 1e-4, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-05, 'gamma': 0.0001, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.1332767624303391


In [4]:
param_grid = {
    'alpha': np.linspace(1e-7, 1e-5, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-5, 1e-3, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-07, 'gamma': 0.0008787755102040817, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.13327457653034555


In [5]:
param_grid = {
    'alpha': np.linspace(1e-15, 1e-13, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(0.0006, 0.001, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-15, 'gamma': 0.0007306122448979592, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.12216791615977746


In [6]:
param_grid = {
    'alpha': np.linspace(1e-17, 1e-15, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(0.0007, 0.009, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 5.555102040816327e-16, 'gamma': 0.003579591836734694, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.11916077001687622


In [7]:
param_grid = {
    'alpha': np.linspace(3e-16, 7e-16, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(0.001, 0.006, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.4081632653061223e-16, 'gamma': 0.0028367346938775514, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.11787818055224993


In [9]:
param_grid = {
    'alpha': np.linspace(3e-16, 4e-16, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(0.0025, 0.0035, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3e-16, 'gamma': 0.0032346938775510204, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.1098240556228957


In [11]:
param_grid = {
    'alpha': np.linspace(2e-16, 3e-16, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': [0.0032346938775510204],  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 2e-16, 'gamma': 0.0032346938775510204, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.1098240556228957


### Gaussian_coulombic_delta_tot ###

In [None]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-5), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-9, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(7e-5, 5e-5, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-9, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

## Linear ##

### Linear_lexi_nd_delta ###

In [None]:
linear_model = Ridge()

# Define the hyperparameters to tune and their respective values
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e2), num=360),  # Regularization strength
}

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(linear_model, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
linear_model = Ridge()

# Define the hyperparameters to tune and their respective values
param_grid = {
    'alpha': np.linspace(0.02, 0.05, num=100),  # Regularization strength
}

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(linear_model, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

## Extended Gaussian Kernel ##

In [7]:
from itertools import product

def generate_parameter_combinations(param_grid):
    """ 
    Generate all possible parameter combinations:
    - Iterate over combinations of parameter values using 'product(*values)'
    - For each combination, zip the parameter names ('keys') with the values
      and create a dictionary using 'dict(zip(keys, combination))'
    - Collect all dictionaries in a list comprehension

    Parameters:
        param_grid (dict): Dictionary of parameters and their possible values.

    Returns:
        list: A list of dictionaries representing all possible parameter combinations.
    """

    keys = param_grid.keys()     # type is dict_keys
    values = param_grid.values() # type is dict_values
    return [dict(zip(keys, combination)) for combination in product(*values)]

In [None]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4],
    'epsilon': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1], 
    'beta': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1], 
    'alpha': [1e-9, 5e-9, 1e-8, 5e-8, 1e-7, 5e-7, 1e-6] 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")


In [None]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.linspace(1e-10, 1e-9, num=10),
    'epsilon': np.linspace(4e-4, 6e-4, num=10), 
    'beta': np.linspace(1e-7, 5e-7, num=10),
    'alpha': [1e-15]
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

## Coefficient Sequared Eig ##

### Normal Gaussian ###

In [5]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-3, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-8, 1e-3, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_inv_square_eig, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-08, 'gamma': 1e-08, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.024065022940014474


In [9]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-13), np.log10(1e-8), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-13), np.log10(1e-8), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_inv_square_eig, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-13, 'gamma': 1.7575106248547965e-11, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.023539081477048356


In [10]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-18), np.log10(1e-13), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-12), np.log10(1e-10), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_inv_square_eig, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.5564803062231213e-16, 'gamma': 3.3932217718953296e-12, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.02167388059908677


In [11]:
param_grid = {
    'alpha': np.linspace(1e-16, 1e-15, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-12, 1e-11, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_inv_square_eig, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.387755102040816e-16, 'gamma': 1.7346938775510203e-12, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.02080583000859528


In [12]:
param_grid = {
    'alpha': np.linspace(3e-16, 4e-16, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-12, 5e-12, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_inv_square_eig, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.346938775510204e-16, 'gamma': 2.7142857142857142e-12, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.020633122042542803


In [13]:
param_grid = {
    'alpha': np.linspace(3e-16, 4e-16, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(2e-12, 3e-12, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_inv_square_eig, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.346938775510204e-16, 'gamma': 2.7142857142857142e-12, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.020633122042542803


### Extended Guassian ###

In [15]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4],
    'epsilon': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1], 
    'beta': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1], 
    'alpha': [1e-9, 5e-9, 1e-8, 5e-8, 1e-7, 5e-7, 1e-6] 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 1e-07, 'epsilon': 0.001, 'beta': 0.1, 'alpha': 1e-06}
Best score: 1.405021514056013


In [16]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(1e-9), np.log10(1e-7), num=7),
    'epsilon': np.logspace(np.log10(1e-5), np.log10(1e-3), num=7), 
    'beta': np.logspace(np.log10(1e-5), np.log10(1e-3), num=7), 
    'alpha': np.logspace(np.log10(1e-8), np.log10(1e-6), num=7) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 1e-07, 'epsilon': 1e-05, 'beta': 1e-05, 'alpha': 1e-07}
Best score: 0.33782084262840406


In [8]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(1e-8), np.log10(1e-6), num=9),
    'epsilon': np.logspace(np.log10(1e-7), np.log10(1e-5), num=9), 
    'beta': np.logspace(np.log10(1e-7), np.log10(1e-5), num=9), 
    'alpha': np.logspace(np.log10(1e-8), np.log10(1e-6), num=9) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 1.7782794100389227e-07, 'epsilon': 1e-07, 'beta': 1.778279410038923e-06, 'alpha': 3.162277660168379e-07}
Best score: 0.027820528560536635


In [11]:
print(np.linspace(1e-7, 5e-7, num=9, endpoint=True))
print(np.logspace(np.log10(1e-7), np.log10(5e-7), num=9))

[1.0e-07 1.5e-07 2.0e-07 2.5e-07 3.0e-07 3.5e-07 4.0e-07 4.5e-07 5.0e-07]
[1.00000000e-07 1.22284454e-07 1.49534878e-07 1.82857910e-07
 2.23606798e-07 2.73436353e-07 3.34370152e-07 4.08882717e-07
 5.00000000e-07]


In [12]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(1e-7), np.log10(5e-7), num=9),
    'epsilon': np.logspace(np.log10(1e-9), np.log10(1e-7), num=9), 
    'beta': np.logspace(np.log10(1e-6), np.log10(5e-6), num=9), 
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-6), num=9) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 1e-07, 'epsilon': 1e-09, 'beta': 2.2360679774997895e-06, 'alpha': 1e-07}
Best score: 0.023318692621455295


In [13]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(5e-8), np.log10(1e-7), num=9),
    'epsilon': np.logspace(np.log10(1e-11), np.log10(1e-9), num=9), 
    'beta': np.linspace(2e-6, 3e-6, num=9, endpoint=True), 
    'alpha': np.logspace(np.log10(1e-8), np.log10(1e-7), num=9) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 4.999999999999999e-08, 'epsilon': 1e-11, 'beta': 2e-06, 'alpha': 7.498942093324559e-08}
Best score: 0.022068364657641663


In [14]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(1e-8), np.log10(5e-8), num=9),
    'epsilon': np.logspace(np.log10(1e-13), np.log10(1e-11), num=9), 
    'beta': np.linspace(1e-6, 2e-6, num=9, endpoint=True), 
    'alpha': np.logspace(np.log10(5e-8), np.log10(1e-7), num=9) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 2.7343635285210528e-08, 'epsilon': 1e-13, 'beta': 1.125e-06, 'alpha': 4.999999999999999e-08}
Best score: 0.021865089243977343


In [17]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.linspace(2.5e-8, 3.5e-8, num=9, endpoint=True),
    'epsilon': [0, 1e-19, 1e-17, 1e-15, 1e-13], 
    'beta': np.linspace(1e-6, 1.5e-6, num=9, endpoint=True), 
    'alpha': np.logspace(np.log10(1e-8), np.log10(5e-8), num=9) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 2.5e-08, 'epsilon': 1e-13, 'beta': 1e-06, 'alpha': 4.088827169789712e-08}
Best score: 0.02183295823155789


In [18]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.linspace(2e-8, 3e-8, num=9, endpoint=True),
    'epsilon': np.logspace(np.log10(1e-14), np.log10(1e-13), num=9), 
    'beta': np.linspace(1e-6, 1.5e-6, num=9, endpoint=True), 
    'alpha': np.linspace(3e-8, 5e-8, num=9, endpoint=True) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 2.3749999999999998e-08, 'epsilon': 1e-13, 'beta': 1e-06, 'alpha': 4e-08}
Best score: 0.021824137110720777


In [19]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X_lexi_inv_square_eig.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': np.linspace(2.3e-8, 2.5e-8, num=9, endpoint=True),
    'epsilon': np.logspace(np.log10(5e-14), np.log10(5e-13), num=9), 
    'beta': np.linspace(9e-7, 1e-6, num=9, endpoint=True), 
    'alpha': np.linspace(3.8e-8, 4.2e-8, num=9, endpoint=True) 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")

Best params: {'gamma': 2.3e-08, 'epsilon': 4.999999999999999e-13, 'beta': 9.249999999999999e-07, 'alpha': 3.85e-08}
Best score: 0.021812319989313496
