In [48]:
import pandas as pd
from prettytable import PrettyTable
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, RationalQuadratic, ConstantKernel as C
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error

In [None]:
steel_data = pd.read_csv("steel.csv")

# Features of the dataset, all cols bar the last
features = steel_data.values[:, :-1]
# Ground truths, the last column
ground_truths = steel_data.values[:, -1]

In [None]:
kf = KFold(n_splits=10, shuffle=False)

model = GaussianProcessRegressor(random_state=4)

param_grid={
    "alpha":[1e-10, 0.01, 0.1, 1, 10],
    "kernel": [
        RBF(length_scale=1.0),
        Matern(length_scale=1.0, nu=1.5),
        RationalQuadratic(length_scale=1.0, alpha=0.5),
        C(constant_value=1.0)
    ]}

scores_headers = ["Fold", "R2 Score", "Mean Squared Error"]
scores_list = []

In [45]:
for i, (train_index, test_index) in enumerate(kf.split(features)):
    print(f"Fold {i+1}")

    # Features and ground truths for the ith fold
    training_features, test_features = features[train_index], features[test_index]
    training_ground_truths, test_ground_truths = ground_truths[train_index], ground_truths[test_index]

    model.fit(training_features, training_ground_truths)

    prediction = model.predict(test_features)

    r2 = r2_score(test_ground_truths, prediction)

    mse = mean_squared_error(test_ground_truths, prediction)

    scores_list.append([f"{i+1}", f"{r2:.2f}", f"{mse:.2f}"])

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 6
Fold 7
Fold 8
Fold 9
Fold 10


In [46]:
table = PrettyTable()

table.title = "Error Scores with Default Params"
table.field_names = scores_headers
table.add_rows(scores_list)
print(table)

+--------------------------------------+
|   Error Scores with Default Params   |
+------+----------+--------------------+
| Fold | R2 Score | Mean Squared Error |
+------+----------+--------------------+
|  1   |  -3.81   |      20167.44      |
|  2   |  -4.52   |      26129.97      |
|  3   |  -3.62   |      31384.06      |
|  4   |  -3.67   |      28257.58      |
|  5   |  -3.69   |      23575.17      |
|  6   |  -5.69   |      52048.96      |
|  7   |  -6.79   |      32426.52      |
|  8   |  -6.14   |      42822.30      |
|  9   |  -5.60   |      74852.90      |
|  10  |  -9.00   |      84511.17      |
+------+----------+--------------------+


In [47]:
hyperparam_score_headers = ["Alpha", "Kernel", "R2 Score", "Mean Squared Error"]
hyperparam_scores_list = []

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring=["r2", "neg_mean_squared_error"], refit="r2", n_jobs=-1)

grid_search.fit(features, ground_truths)

results = grid_search.cv_results_
for mean_r2, mean_mse, params in zip(results['mean_test_r2'], results['mean_test_neg_mean_squared_error'], results['params']):

    alpha = params["alpha"]
    kernel = params["kernel"]
    hyperparam_scores_list.append([f"{alpha}", f"{kernel}", f"{mean_r2:.2f}", f"{(-1)*mean_mse:.2f}"])

hyperparam_table = PrettyTable()
hyperparam_table.title = "Hyperparameters & Values"
hyperparam_table.field_names = hyperparam_score_headers
hyperparam_table.add_rows(hyperparam_scores_list)
print(hyperparam_table)

print(grid_search.best_params_)

+--------------------------------------------------------------------------------------+
|                               Hyperparameters & Values                               |
+-------+----------------------------------------------+----------+--------------------+
| Alpha |                    Kernel                    | R2 Score | Mean Squared Error |
+-------+----------------------------------------------+----------+--------------------+
| 1e-10 |             RBF(length_scale=1)              |  -5.90   |      45256.42      |
| 1e-10 |        Matern(length_scale=1, nu=1.5)        |  -5.90   |      45237.80      |
| 1e-10 | RationalQuadratic(alpha=0.5, length_scale=1) |   0.22   |      5316.27       |
| 1e-10 |                     1**2                     |  -56.07  |     277077.57      |
|  0.01 |             RBF(length_scale=1)              |  -5.40   |      42494.65      |
|  0.01 |        Matern(length_scale=1, nu=1.5)        |  -5.15   |      41001.47      |
|  0.01 | RationalQua