In [52]:
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
import warnings
import numpy as np
import pandas as pd
from IPython.display import display
import copy


import sys
sys.path.append('../Data')

## Load Dataset ##

In [2]:
# Load Data

X = pd.read_csv('../Data/[Benz] X.csv')
X_lexi = pd.read_csv('../Data/[Benz] X_lexi.csv')
X_lexi_nd = pd.read_csv('../Data/[Benz] X_lexi_nd.csv')
X_sorted = pd.read_csv('../Data/[Benz] X_sorted.csv')
X_concat = pd.read_csv('../Data/[Benz] X_concat.csv')
X_coulomb = pd.read_csv('../Data/[Benz] X_coulomb.csv')

y_energy = pd.read_csv("../Data/[Benz] y_energy.csv")
y_elec = pd.read_csv("../Data/[Benz] y_elec.csv")
y_delta_energy = pd.read_csv("../Data/[Benz] y_delta_energy.csv")
y_delta_elec = pd.read_csv("../Data/[Benz] y_delta_elec.csv")

In [43]:
print(y_delta_energy)

    delta total energy
0             2.576317
1             2.515439
2             2.520922
3             5.759256
4             5.796641
5             5.872848
6             5.683093
7             5.962955
8             5.901222
9             5.781881
10            5.883617
11            5.896423
12            5.857989
13            5.897367
14            8.980319
15            9.212556
16            9.402806


## Define Kernel ##

In [47]:
def extended_gaussian_kernel(x, y, params):
    """
    Calculates the similarity between two vectors using an extended gaussian kernel.
    The kernel takes into account distance between vectors, norm difference, and angular difference

    Args:
        x (numpy.ndarray): Input vector x.
        y (numpy.ndarray): Input vector y.
        params (dict): Dictionary of hyperparameters:
        - gamma (float): Hyperparameter for the distance term.
        - epsilon (float): Hyperparameter for the norm difference term.
        - beta (float): Hyperparameter for the angular difference term.

    Returns:
        float: Similarity value between the input vectors.

    """
    gamma = params['gamma']
    epsilon = params['epsilon']
    beta = params['beta']

    x_norm = np.linalg.norm(x)
    y_norm = np.linalg.norm(y)
    cos_theta = np.dot(x, y) / (x_norm * y_norm)
    distance = np.linalg.norm(x - y)
    
    phi = np.exp(-gamma * (distance**2)/2 - epsilon * (x_norm - y_norm)**2 - beta * (1 - cos_theta**2))
    return phi


## Build Model ##

### Extended Gaussian Kernel ###

In [45]:
def create_similarity_matrix(X_ref, X_query, similarity_kernel, params):
    """
    Create a similarity matrix using a specified similarity kernel.

    Args:
        X_ref (numpy.ndarray): Reference training examples.
        X_quary (numpy.ndarray): Query input data to be compared with X_ref
        similarity_kernel (function): Function to calculate similarity between two vectors.
        params (dict): Dictionary of hyperparameters for the similarity kernel.

    Returns:
        numpy.ndarray: Similarity matrix.

    """
    similarity_matrix = np.zeros((X_ref.shape[0], X_query.shape[0]))
    for i in range(X_ref.shape[0]):
        for j in range(X_query.shape[0]):
            similarity_matrix[i, j] = similarity_kernel(X_ref[i], X_query[j], params)
    return similarity_matrix

In [50]:
# Convert data to numpy arrays
X_train = X.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameters for the specialized kernel
params = {'gamma': 1e-6, 'epsilon': 0.005, 'beta': 0.005}

similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)

# Create an instance of KernelRidge with your specialized kernel
krr_model = KernelRidge(kernel='precomputed', alpha=1e-8)

# Create a KFold object for 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation and get the mean R^2 score
mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
rmse_scores = np.sqrt(-mse_scores)

# Calculate the average error across all folds
avg_rmse = rmse_scores.mean()

# Print the mean squared error for each fold
for fold, rmse in enumerate(rmse_scores):
    print(f"Fold {fold+1}: RMSE = {rmse}")

# Print the average mean squared error
print(f"Average RMSE across all folds: {avg_rmse}")


Fold 1: RMSE = 0.05034591040032604
Fold 2: RMSE = 0.057066123436842815
Fold 3: RMSE = 0.09413115436032296
Fold 4: RMSE = 0.03467978678400804
Fold 5: RMSE = 0.026288322577963315
Average RMSE across all folds: 0.052502259511892635


### Normal Gaussian Kernel ###

In [48]:
X_train = X.to_numpy()
y_train = y_delta_energy.to_numpy()

params = {'alpha': 1.5306122448979593e-09, 'gamma': 2.2857142857142856e-06, 'kernel': 'rbf'}
KRR_model = KernelRidge(**params)

k_fold = KFold(n_splits=2, shuffle=True, random_state=42)
mse_scores = cross_val_score(KRR_model, X_train, y_train, scoring='neg_mean_squared_error', cv=k_fold)
rmse_scores = np.sqrt(-mse_scores)

# Calculate the average error across all folds
avg_rmse = rmse_scores.mean()

# Print the mean squared error for each fold
print("Polynomial KRR:")
for fold, rmse in enumerate(rmse_scores):
    print(f"Fold {fold+1}: RMSE = {rmse}")

# Print the average mean squared error
print(f"Average MSE across all folds: {avg_rmse}")

Polynomial KRR:
Fold 1: RMSE = 3.4261441483305215
Fold 2: RMSE = 2.9072305118664477
Average MSE across all folds: 3.1666873300984846


## Tuning ##

In [32]:
from itertools import product

def generate_parameter_combinations(param_grid):
    """ 
    Generate all possible parameter combinations:
    - Iterate over combinations of parameter values using 'product(*values)'
    - For each combination, zip the parameter names ('keys') with the values
      and create a dictionary using 'dict(zip(keys, combination))'
    - Collect all dictionaries in a list comprehension

    Parameters:
        param_grid (dict): Dictionary of parameters and their possible values.

    Returns:
        list: A list of dictionaries representing all possible parameter combinations.
    """

    keys = param_grid.keys()     # type is dict_keys
    values = param_grid.values() # type is dict_values
    return [dict(zip(keys, combination)) for combination in product(*values)] 

# Example usage
param_grid = {
    'gamma': [0.1, 0.5, 1.0],
    'epsilon': [0.2, 0.4, 0.6],
    'alpha': [0.1, 0.5, 1.0]
}

parameter_combinations = generate_parameter_combinations(param_grid)
for params in parameter_combinations:
    print(params)

{'gamma': 0.1, 'epsilon': 0.2, 'alpha': 0.1}
{'gamma': 0.1, 'epsilon': 0.2, 'alpha': 0.5}
{'gamma': 0.1, 'epsilon': 0.2, 'alpha': 1.0}
{'gamma': 0.1, 'epsilon': 0.4, 'alpha': 0.1}
{'gamma': 0.1, 'epsilon': 0.4, 'alpha': 0.5}
{'gamma': 0.1, 'epsilon': 0.4, 'alpha': 1.0}
{'gamma': 0.1, 'epsilon': 0.6, 'alpha': 0.1}
{'gamma': 0.1, 'epsilon': 0.6, 'alpha': 0.5}
{'gamma': 0.1, 'epsilon': 0.6, 'alpha': 1.0}
{'gamma': 0.5, 'epsilon': 0.2, 'alpha': 0.1}
{'gamma': 0.5, 'epsilon': 0.2, 'alpha': 0.5}
{'gamma': 0.5, 'epsilon': 0.2, 'alpha': 1.0}
{'gamma': 0.5, 'epsilon': 0.4, 'alpha': 0.1}
{'gamma': 0.5, 'epsilon': 0.4, 'alpha': 0.5}
{'gamma': 0.5, 'epsilon': 0.4, 'alpha': 1.0}
{'gamma': 0.5, 'epsilon': 0.6, 'alpha': 0.1}
{'gamma': 0.5, 'epsilon': 0.6, 'alpha': 0.5}
{'gamma': 0.5, 'epsilon': 0.6, 'alpha': 1.0}
{'gamma': 1.0, 'epsilon': 0.2, 'alpha': 0.1}
{'gamma': 1.0, 'epsilon': 0.2, 'alpha': 0.5}
{'gamma': 1.0, 'epsilon': 0.2, 'alpha': 1.0}
{'gamma': 1.0, 'epsilon': 0.4, 'alpha': 0.1}
{'gamma': 

In [None]:
np.logspace(np.log10(1e-7), np.log10(1e-3), num=9)

In [54]:
# Convert the pandas DataFrame and Series to numpy arrays
X_train = X.to_numpy()
y_train = y_delta_energy.to_numpy()

# Define the hyperparameter grid
param_grid = {
    'gamma': [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4],
    'epsilon': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1], 
    'beta': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1], 
    'alpha': [1e-9, 5e-9, 1e-8, 5e-8, 1e-7, 5e-7, 1e-6] 
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_params = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_params = param_copy

print(f"Best params: {best_params}")
print(f"Best score: {best_mean_score}")


Best params: {'gamma': 1e-06, 'epsilon': 0.005, 'beta': 0.001, 'alpha': 1e-09}
Best score: 0.045494072588321555


In [55]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(5e-7), np.log10(2e-6), num=8),
    'epsilon': np.logspace(np.log10(1e-3), np.log10(1e-2), num=8), 
    'beta': np.logspace(np.log10(1e-5), np.log10(1e-3), num=9), 
    'alpha': np.logspace(np.log10(1e-11), np.log10(1e-9), num=9)
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)


{'gamma': 5e-07, 'epsilon': 0.0013894954943731374, 'beta': 0.00017782794100389227, 'alpha': 1e-11}
0.03948174530461561


In [56]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(1e-7), np.log10(1e-6), num=8),
    'epsilon': np.logspace(np.log10(5e-4), np.log10(5e-3), num=8), 
    'beta': np.logspace(np.log10(5e-5), np.log10(5e-4), num=8),
    'alpha': np.logspace(np.log10(1e-13), np.log10(1e-11), num=9)
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

{'gamma': 1e-07, 'epsilon': 0.0009653488644416247, 'beta': 4.9999999999999996e-05, 'alpha': 1e-13}
0.03803322661897801


In [57]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(1e-8), np.log10(1e-7), num=8),
    'epsilon': np.logspace(np.log10(7e-4), np.log10(3e-3), num=8), 
    'beta': np.logspace(np.log10(1e-5), np.log10(1e-4), num=8),
    'alpha': np.logspace(np.log10(1e-15), np.log10(1e-13), num=9)
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

{'gamma': 1.9306977288832496e-08, 'epsilon': 0.0007000000000000001, 'beta': 1e-05, 'alpha': 1e-15}
0.03745582905523383


In [58]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(1e-8), np.log10(3e-8), num=8),
    'epsilon': np.logspace(np.log10(5e-4), np.log10(1e-3), num=8), 
    'beta': np.logspace(np.log10(1e-6), np.log10(1e-5), num=8),
    'alpha': np.logspace(np.log10(1e-15), np.log10(1e-13), num=9)
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

{'gamma': 1e-08, 'epsilon': 0.0005520447568369063, 'beta': 5.179474679231212e-06, 'alpha': 1e-15}
0.03732612247579326


In [59]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(5e-9), np.log10(3e-8), num=8),
    'epsilon': np.linspace(3e-4, 7e-4, num=8), 
    'beta': np.linspace(3e-6, 7e-6, num=8),
    'alpha': np.logspace(np.log10(1e-15), np.log10(1e-13), num=9)
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

{'gamma': 4.999999999999999e-09, 'epsilon': 0.0005285714285714286, 'beta': 3e-06, 'alpha': 1e-15}
0.03724886643349343


In [61]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(5e-10), np.log10(5e-9), num=20),
    'epsilon': np.linspace(4e-4, 6e-4, num=20), 
    'beta': np.logspace(np.log10(5e-7), np.log10(5e-6), num=20),
    'alpha': [1e-15]
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

{'gamma': 9.164903554162186e-10, 'epsilon': 0.0004210526315789474, 'beta': 5e-07, 'alpha': 1e-15}
0.037144847328403036


In [62]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.logspace(np.log10(7e-10), np.log10(3e-9), num=20),
    'epsilon': np.linspace(4e-4, 6e-4, num=10), 
    'beta': np.logspace(np.log10(5e-8), np.log10(5e-7), num=20),
    'alpha': [1e-15]
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

{'gamma': 7.000000000000002e-10, 'epsilon': 0.0004222222222222222, 'beta': 3.923799851757303e-07, 'alpha': 1e-15}
0.03713331563664969


In [63]:
# Define the hyperparameter grid
param_grid = {
    'gamma': np.linspace(1e-10, 1e-9, num=10),
    'epsilon': np.linspace(4e-4, 6e-4, num=10), 
    'beta': np.linspace(1e-7, 5e-7, num=10),
    'alpha': [1e-15]
}

#########################################################################################################

parameter_combinations = generate_parameter_combinations(param_grid)
best_mean_score = np.inf
best_param = None

# Iterate through every parameter combination
for params in parameter_combinations:
    
    # Make a copy of the parameters
    param_copy = copy.deepcopy(params)

    alpha = params.pop('alpha') # the params passed into the kernel doesn't include regularzation
    similarity_matrix = create_similarity_matrix(X_train, X_train, extended_gaussian_kernel, params)
    krr_model = KernelRidge(kernel='precomputed', alpha=alpha)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(krr_model, similarity_matrix, y_train, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-mse_scores)
    avg_rmse = rmse_scores.mean()

    if avg_rmse < best_mean_score:
        best_mean_score = avg_rmse
        best_param = param_copy

print(best_param)
print(best_mean_score)

{'gamma': 4.0000000000000007e-10, 'epsilon': 0.0004, 'beta': 2.333333333333333e-07, 'alpha': 1e-15}
0.03712437931741687


## Evaluation ##