# Gaussian Process Regressor

In [12]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes
from sklearn import metrics
import numpy as np
import pickle

In [3]:
diabeties = load_diabetes()

In [7]:
# Generate synthetic data
X = diabeties.data
Y = diabeties.target

# Generate synthetic data
#X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Instantiate the GaussianProcessRegressor model with RBF kernel
kernel = RBF(length_scale=1.0)
gpr = GaussianProcessRegressor(kernel=kernel, random_state=42)

# Fit the model to the training data
gpr.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = gpr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 26548.584269662922


Hyperparameter tuning for GaussianProcessRegressor involves finding the optimal values for parameters like the kernel parameters or other model-specific parameters. One common approach for hyperparameter tuning is to use cross-validation.

Here's how you can perform hyperparameter tuning for GaussianProcessRegressor using cross-validation:

In [14]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error

# Generate synthetic data
#X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the parameter grid
param_grid = {
    'kernel': [RBF(length_scale=1.0), RBF(length_scale=0.1), RBF(length_scale=0.01), RBF(length_scale=0.001)],
    'alpha': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
}

# Instantiate the GaussianProcessRegressor model
gpr = GaussianProcessRegressor(random_state=42)

# Perform grid search with cross-validation
grid_search = GridSearchCV(gpr, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Evaluate the model with best parameters
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
# mse = mean_squared_error(y_test, y_pred)
# print("Mean Squared Error:", mse)

err = metrics.mean_squared_error(y_test, best_model.predict(X_test))
r2 = metrics.r2_score(y_test, best_model.predict(X_test))
rmse = np.sqrt(err)

print("---Gaussian Process Regressor on diabeties dataset---")
rmse = np.sqrt(err)
print("R squared: %.2f"%r2, ' MSE: %.2f \n' %err, ' RMSE: %.2f \n' %rmse) 

Best Parameters: {'alpha': 0.1, 'kernel': RBF(length_scale=0.1)}
---Gaussian Process Regressor on diabeties dataset---
R squared: 0.06  MSE: 4959.38 
  RMSE: 70.42 

