In [1]:
import sys
sys.path.append('../..')
sys.path.append('../data')
sys.path.append('../../helper_code')

from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error
import warnings
import numpy as np
import pandas as pd
from IPython.display import display
import copy

from helper_code.custom_kernel import *

In [2]:
input_dataset = ['c', 'c_lexi', 'CSE', 'CSE_lexi']
dataset_dict = {}

for data in input_dataset:
    dataset_dict[data] = pd.read_csv(f'../data/coronene_training_data/{data}.csv')

delta_total_energy = pd.read_csv(f'../data/coronene_training_data/delta_total_energy.csv')
delta_delta_total_energy = pd.read_csv(f'../data/coronene_training_data/delta_delta_total_energy.csv')

In [8]:
X_train = dataset_dict['CSE_lexi'].to_numpy()
y_train = delta_delta_total_energy

param_grid = {
    'alpha': np.logspace(np.log10(1e-13), np.log10(1e-3), num=101),  
    'gamma': np.logspace(np.log10(1e-13), np.log10(1e-3), num=101),  
    'kernel': ['rbf']
}

model = KernelRidge()
kfold = KFold(n_splits=2, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    grid_search = RandomizedSearchCV(model, param_grid, n_iter=500, scoring='neg_mean_squared_error', cv=kfold)
    grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'kernel': 'rbf', 'gamma': 3.9810717055349776e-08, 'alpha': 1e-10}
Best Root Mean Squared Error: 0.1489627308333547


In [9]:
X_train = dataset_dict['CSE_lexi'].to_numpy()
y_train = delta_delta_total_energy

param_grid = {
    'alpha': np.logspace(np.log10(1e-9), np.log10(1e-7), num=21),  
    'gamma': np.logspace(np.log10(1e-11), np.log10(1e-9), num=21),  
    'kernel': ['rbf']
}

model = KernelRidge()
kfold = KFold(n_splits=2, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=kfold)
    grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 1e-09, 'gamma': 1e-09, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.15033871067431506


In [10]:
X_train = dataset_dict['CSE_lexi'].to_numpy()
y_train = delta_delta_total_energy

param_grid = {
    'alpha': np.logspace(np.log10(1e-11), np.log10(1e-9), num=21),  
    'gamma': np.logspace(np.log10(1e-9), np.log10(1e-7), num=21),  
    'kernel': ['rbf']
}

model = KernelRidge()
kfold = KFold(n_splits=2, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=kfold)
    grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 2.5118864315095823e-11, 'gamma': 1.9952623149688786e-08, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.14895567868685927


In [11]:
X_train = dataset_dict['CSE_lexi'].to_numpy()
y_train = delta_delta_total_energy

param_grid = {
    'alpha': np.linspace(1e-11, 5e-11, num=21),  
    'gamma': np.linspace(1e-8, 5e-8, num=21),  
    'kernel': ['rbf']
}

model = KernelRidge()
kfold = KFold(n_splits=2, shuffle=True, random_state=42)

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=kfold)
    grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best Root Mean Squared Error:", best_score)

Best Hyperparameters: {'alpha': 4.6e-11, 'gamma': 2.8000000000000003e-08, 'kernel': 'rbf'}
Best Root Mean Squared Error: 0.14895286776797315
