In [1]:
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
import warnings
import numpy as np
import pandas as pd
from IPython.display import display

import sys
sys.path.append('../Data')

## Load Data ##

In [15]:
# Load Data

X_lexi = pd.read_csv('../Data/[Benz] X_lexi.csv')
X_lexi_nd = pd.read_csv('../Data/[Benz] X_lexi_nd.csv')
X_sorted = pd.read_csv('../Data/[Benz] X_sorted.csv')
X_concat = pd.read_csv('../Data/[Benz] X_concat.csv')

y_energy = pd.read_csv("../Data/[Benz] y_energy.csv")
y_elec = pd.read_csv("../Data/[Benz] y_elec.csv")
y_delta_elec = pd.read_csv("../Data/[Benz] y_delta_elec.csv")
y_delta_energy = pd.read_csv("../Data/[Benz] y_delta_energy.csv")

## Polynomial Kernel ##

### Poly_lexi_delta_tot ###

In [13]:
# Small alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-07, 'coef0': 1e-05, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442300235346188


In [14]:
# Small alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1.2663801734674022e-07, 'coef0': 0.1, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4453004441426715


In [15]:
# large alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.001, 'coef0': 1e-05, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.44423376250340063


In [16]:
# large alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.001, 'coef0': 0.1, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4453041728880147


In [17]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-08, 'coef0': 1e-06, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442297841232004


In [18]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-10, 'coef0': 1.1153846153846154e-07, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442033972799054


In [19]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-10, 'coef0': 9.238461538461538e-05, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442023197670601


In [20]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-08, 'coef0': 1.623076923076923e-07, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442296972350603


In [21]:
param_grid = {
    'alpha': np.linspace(1e-12, 1e-10, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-4, num=80), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-12, 'coef0': 0.0001, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.43066847166021627


In [22]:
param_grid = {
    'alpha': np.linspace(1e-14, 1e-12, num=80),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-2, num=120), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 3.4835443037974683e-13, 'coef0': 0.0024369823529411766, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4254779738326166


In [25]:
param_grid = {
    'alpha': np.linspace(1e-13, 1e-12, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1.1836734693877552e-13, 'coef0': 0.007061224489795919, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.42661348517878667


In [26]:
param_grid = {
    'alpha': np.linspace(2e-13, 5e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=100), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 2e-13, 'coef0': 0.005636363636363636, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4257777435534395


In [29]:
param_grid = {
    'alpha': np.linspace(3e-13, 4e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.002, 0.003, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 3.428571428571428e-13, 'coef0': 0.002285714285714286, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.42964715235000245


### Poly_lexi_nd_delta_tot ###

In [19]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-07, 'coef0': 1e-05, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4319134769216178


In [20]:
param_grid = {
    'alpha': np.linspace(1e-9, 1e-7, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-09, 'coef0': 1.2879999999999998e-06, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4319116724429447


In [21]:
param_grid = {
    'alpha': np.linspace(1e-11, 1e-9, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 2e-6, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-11, 'coef0': 1.02e-06, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.43177823161436146


In [22]:
param_grid = {
    'alpha': np.linspace(1e-13, 1e-11, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-13, 'coef0': 6.04e-06, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.42511662604448713


In [23]:
param_grid = {
    'alpha': np.linspace(1e-15, 1e-13, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-15, 'coef0': 6.04e-06, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.425116626044487


In [25]:
param_grid = {
    'alpha': np.linspace(1e-17, 1e-15, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-17, 'coef0': 6.04e-06, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.425116626044487


## Gaussian Kernel ##

### Gaussian_lexi_delta_tot

In [26]:
param_grid = {
    'alpha': np.linspace(1e-15, 1e-13, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-15, 'gamma': 3.0204081632653064e-07, 'kernel': 'rbf'}
Best Root Mean Squared Error: 1.5241754010954465


In [28]:
param_grid = {
    'alpha': np.linspace(1e-17, 1e-15, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(2e-7, 4e-7, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-17, 'gamma': 2e-07, 'kernel': 'rbf'}
Best Root Mean Squared Error: 1.5241502406409801


### Gaussian_lexi_nd_delta_tot ###

In [16]:
# Small alpha, small gamma

param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-5), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1.2067926406393263e-09, 'gamma': 2.4420530945486497e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.015033830741354816


In [17]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=51, endpoint=True),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=51, endpoint=True),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1.4860000000000002e-09, 'gamma': 2.278e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.014988744264583903


In [18]:
param_grid = {
    'alpha': np.linspace(1e-9, 2e-9, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(2e-6, 3e-6, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1.5306122448979593e-09, 'gamma': 2.2857142857142856e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.014988605927057332


## Linear ##

### Linear_lexi_nd_delta ###

In [30]:
linear_model = Ridge()

# Define the hyperparameters to tune and their respective values
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e2), num=360),  # Regularization strength
}

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(linear_model, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 0.03676711978711884}
Best Root Mean Squared Error: 0.023452687519466057


In [31]:
linear_model = Ridge()

# Define the hyperparameters to tune and their respective values
param_grid = {
    'alpha': np.linspace(0.02, 0.05, num=100),  # Regularization strength
}

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(linear_model, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 0.03666666666666667}
Best Root Mean Squared Error: 0.02345266178806411
