In [7]:
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
import warnings
import numpy as np
import pandas as pd
from IPython.display import display

import sys
sys.path.append('../Data')

In [9]:
# Load Data

X_lexi = pd.read_csv('../Data/[Benz] X_lexi.csv')
X_lexi_opp = pd.read_csv("../Data/[Benz] X_lexi_opp.csv")
X_sorted = pd.read_csv("../Data/[Benz] X_sorted.csv")

y_energy = pd.read_csv("../Data/[Benz] y_energy.csv")
y_elec = pd.read_csv("../Data/[Benz] y_elec.csv")
y_delta_elec = pd.read_csv("../Data/[Benz] y_delta_elec.csv")
y_delta_energy = pd.read_csv("../Data/[Benz] y_delta_energy.csv")

## Polynomial Kernel ##

### Lexi Mapping ###

#### y_delta_energy ####

In [13]:
# Small alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-07, 'coef0': 1e-05, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442300235346188


In [14]:
# Small alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-7), np.log10(1e-3), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1.2663801734674022e-07, 'coef0': 0.1, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4453004441426715


In [15]:
# large alpha, small coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-5), np.log10(1e-1), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.001, 'coef0': 1e-05, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.44423376250340063


In [16]:
# large alpha, large coef0

param_grid = {
    'alpha': np.logspace(np.log10(1e-3), np.log10(1e1), num=40),
    'kernel': ['poly'],  
    'degree': [2, 3, 4], 
    'coef0': np.logspace(np.log10(1e-1), np.log10(1e3), num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 0.001, 'coef0': 0.1, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4453041728880147


In [17]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-08, 'coef0': 1e-06, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442297841232004


In [18]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-10, 'coef0': 1.1153846153846154e-07, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442033972799054


In [19]:
param_grid = {
    'alpha': np.linspace(1e-10, 1e-8, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-6, 1e-4, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-10, 'coef0': 9.238461538461538e-05, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442023197670601


In [20]:
param_grid = {
    'alpha': np.linspace(1e-8, 1e-6, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-6, num=40), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-08, 'coef0': 1.623076923076923e-07, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4442296972350603


In [21]:
param_grid = {
    'alpha': np.linspace(1e-12, 1e-10, num=40),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-4, num=80), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1e-12, 'coef0': 0.0001, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.43066847166021627


In [22]:
param_grid = {
    'alpha': np.linspace(1e-14, 1e-12, num=80),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(1e-8, 1e-2, num=120), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 3.4835443037974683e-13, 'coef0': 0.0024369823529411766, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4254779738326166


In [25]:
param_grid = {
    'alpha': np.linspace(1e-13, 1e-12, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 1.1836734693877552e-13, 'coef0': 0.007061224489795919, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.42661348517878667


In [26]:
param_grid = {
    'alpha': np.linspace(2e-13, 5e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.001, 0.01, num=100), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 2e-13, 'coef0': 0.005636363636363636, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.4257777435534395


In [29]:
param_grid = {
    'alpha': np.linspace(3e-13, 4e-13, num=50),
    'kernel': ['poly'],  
    'degree': [2], 
    'coef0': np.linspace(0.002, 0.003, num=50), 
}

poly_KRR = KernelRidge()

k_fold = KFold(n_splits=5, shuffle=True, random_state=10)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    grid_search = GridSearchCV(poly_KRR, param_grid, scoring='neg_mean_squared_error', cv=k_fold)
    grid_search.fit(X_lexi, y_delta_energy)

best_params = grid_search.best_params_
best_score = np.sqrt(-grid_search.best_score_)

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_score)

Best Hyperparameters: {'alpha': 3.428571428571428e-13, 'coef0': 0.002285714285714286, 'degree': 2, 'kernel': 'poly'}
Best RMSE: 0.42964715235000245
