In [1]:
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
import warnings
import numpy as np
import pandas as pd
from IPython.display import display

import sys
sys.path.append('../Data')

## Load Data ##

In [2]:
# Load Data

X_lexi = pd.read_csv('../Data/[Benz] X_lexi.csv')
X_lexi_nd = pd.read_csv('../Data/[Benz] X_lexi_nd.csv')
X_sorted = pd.read_csv('../Data/[Benz] X_sorted.csv')
X_concat = pd.read_csv('../Data/[Benz] X_concat.csv')
X_coulomb = pd.read_csv('../Data/[Benz] X_coulomb.csv')

y_energy = pd.read_csv("../Data/[Benz] y_energy.csv")
y_elec = pd.read_csv("../Data/[Benz] y_elec.csv")
y_delta_elec = pd.read_csv("../Data/[Benz] y_delta_elec.csv")
y_delta_energy = pd.read_csv("../Data/[Benz] y_delta_energy.csv")

## Polynomial Kernel ##

### Poly_lexi_delta_tot ###

In [None]:
# Small alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
# Small alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
# large alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
# large alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-12, 1e-10, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-4, num=80), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-14, 1e-12, num=80),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-2, num=120), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-13, 1e-12, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(2e-13, 5e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=100), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(3e-13, 4e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.002, 0.003, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

### Poly_lexi_nd_delta_tot ###

In [None]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-9, 1e-7, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-11, 1e-9, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 2e-6, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-13, 1e-11, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-15, 1e-13, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-7, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-17, 1e-15, num=51, endpoint=True),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-5, num=51, endpoint=True), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

### Poly_sorted_delta_tot ###

In [None]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-12, 1e-10, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-14, 1e-12, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(6e-13, 9e-13, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(3e-5, 5e-5, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

### Poly_coulomb_delta_tot ###

In [9]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.001, 'coef0': 0.002811768697974228, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 2.84221919442246


In [11]:
param_grid = {
    'alpha': np.linspace(0.001, 0.1, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.1, 'coef0': 0.01, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 1.619107598810548


In [12]:
param_grid = {
    'alpha': np.linspace(0.1, 1, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.01, 0.1, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.11836734693877551, 'coef0': 0.1, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 1.6123157382172415


In [13]:
param_grid = {
    'alpha': np.linspace(0.1, 0.2, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.1, 1, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.14489795918367349, 'coef0': 1.0, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 1.5984905965248137


In [14]:
param_grid = {
    'alpha': np.linspace(0.1, 0.2, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1, 10, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.1489795918367347, 'coef0': 1.183673469387755, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 1.5982699182575266


## Gaussian Kernel ##

### Gaussian_lexi_delta_tot

In [None]:
param_grid = {
    'alpha': np.linspace(1e-15, 1e-13, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-17, 1e-15, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(2e-7, 4e-7, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

### Gaussian_lexi_nd_delta_tot ###

In [15]:
# Small alpha, small gamma

param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-5), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.556480306223129e-08, 'gamma': 1.6768329368110065e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.037437385101272584


In [16]:
param_grid = {
    'alpha': np.linspace(1e-9, 1e-7, num=51, endpoint=True),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=51, endpoint=True),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.664000000000001e-08, 'gamma': 1.6839999999999999e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.03743709725344218


In [17]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-7, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-6, 5e-6, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 3.755102040816326e-08, 'gamma': 1.7346938775510206e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.03743287836529461


### Gaussian_sorted_delta_tot ###

In [None]:
# Small alpha, small gamma

param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-5), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(1e-5, 1e-3, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-6, 1e-4, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
param_grid = {
    'alpha': np.linspace(4e-5, 6e-5, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(7e-5, 1e-4, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_sorted, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

### Gaussian_coulombic_delta_tot ###

In [5]:
param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-5), num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-3), num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-09, 'gamma': 6.250551925273976e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 1.8380127173508423


In [6]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-9, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(7e-5, 5e-5, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-09, 'gamma': 7e-05, 'kernel': 'rbf'}
Best Root Mean Squared Error: 3.2524862646881463


In [8]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-9, num=50),  # Regularization parameter controlling the L2 regularization term
    'gamma': np.linspace(1e-7, 1e-5, num=50),  # Parameter for the Gaussian kernel, controlling the width of the kernel
    'kernel': ['rbf'],  # Specifies the kernel function to be used, in this case, the Gaussian (RBF) kernel
}

gaussian_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(gaussian_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_coulomb, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-10, 'gamma': 2.120408163265306e-06, 'kernel': 'rbf'}
Best Root Mean Squared Error: 1.83574791846835


## Linear ##

### Linear_lexi_nd_delta ###

In [None]:
linear_model = Ridge()

# Define the hyperparameters to tune and their respective values
param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e2), num=360),  # Regularization strength
}

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(linear_model, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

In [None]:
linear_model = Ridge()

# Define the hyperparameters to tune and their respective values
param_grid = {
    'alpha': np.linspace(0.02, 0.05, num=100),  # Regularization strength
}

k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(linear_model, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi_nd, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

# Print the best hyperparameters and score
print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)