### Support Vector Machines
1. GaussianProcessClassifier : Gaussian process classification (GPC) based on Laplace approximation.
2. GaussianProcessRegressor  :  Gaussian process regression (GPR).


In [None]:
import sklearn.gaussian_process

gpc = sklearn.gaussian_process.GaussianProcessClassifier(
    kernel=None,                       # Covariance function for the Gaussian Process. Defaults to RBF kernel.
    optimizer="fmin_l_bfgs_b",         # Optimization algorithm for kernel hyperparameters. Default is 'fmin_l_bfgs_b'.
    n_restarts_optimizer=0,            # Number of optimizer restarts to avoid local minima. Default is 0.
    
    max_iter_predict=100,              # Maximum iterations for posterior approximation during prediction. Default is 100.
    warm_start=False,                  # Reuse previous solution for incremental training. Default is False.
    copy_X_train=True,                 # Whether to store a copy of the training data. Default is True.
    multi_class="one_vs_rest",         # Strategy for multi-class classification. Default is 'one_vs_rest'.
    n_jobs=None,                       # Number of CPU cores to use. Default is None (single core/-1);else define number.
    random_state=None,                 # Controls randomness for reproducibility. None means random behavior; set an integer (e.g., 0, 42) for consistent results.
)

# Hyperparameters for tuning
gpc_hyperparameters = {
    "kernel": [None, sklearn.gaussian_process.kernels.RBF(), sklearn.gaussian_process.kernels.Matern()],
    "optimizer": ["fmin_l_bfgs_b", None],  # Default optimizer or no optimization
    "n_restarts_optimizer": [0, 5, 10],    # Number of restarts to avoid local minima
    "max_iter_predict": [50, 100, 200],    # Iterations for posterior approximation
    "warm_start": [True, False],           # Incremental training or fresh start
    "copy_X_train": [True, False],         # Copy training data or modify in place
    "multi_class": ["one_vs_rest", "one_vs_one"],  # Multi-class classification strategy
    "n_jobs": [-1, None],                  # Use all cores (-1) or single core (None)
    "random_state": [None, 42, 0]          # Random state for reproducibility
}


In [None]:
import sklearn.gaussian_process # type: ignore

gpr = sklearn.gaussian_process.GaussianProcessRegressor(
    kernel=None,                   # Kernel defines the covariance function of the Gaussian Process. 
                                   # If None, the default kernel is RBF. Custom kernels can encode specific assumptions 
                                   # about the function being modeled (e.g., smoothness, periodicity, etc.).

    alpha=1e-10,                   # Alpha controls the model's sensitivity to noise in the data.
                                   # It helps prevent overfitting by adding extra noise to smooth predictions.
                                   # A small alpha (e.g., 1e-10) means the model trusts the data with minimal noise assumption.
                                   # A larger alpha means the model is more cautious, assuming more noise in the data.
                                   # It balances between overfitting (fitting too closely to data) and underfitting (not fitting enough).


    optimizer="fmin_l_bfgs_b",     # Optimization algorithm to determine kernel hyperparameters that maximize the 
                                   # log-marginal likelihood. "fmin_l_bfgs_b" is a quasi-Newton method. 
                                   # Set to None to disable optimization, suitable for fixed kernel parameters.

    n_restarts_optimizer=0,        # Number of restarts for the optimizer from different initial points. 
                                   # Useful for avoiding local minima and finding a globally optimal solution.

    normalize_y=False,             # If True, the target values are normalized to zero mean and unit variance before 
                                   # fitting. Normalization can improve performance when the output has a large variance.

    copy_X_train=True,             # If True, the training data is copied, ensuring the original data remains unchanged. 
                                   # Set to False for memory efficiency when the input data does not need to be preserved.

    random_state=None              # Determines reproducibility of random processes (e.g., when "optimizer" is "random"). 
                                   # Use an integer for deterministic results or None for stochastic behavior.
)

# Hyperparameters for tuning
gpr_hyperparameters = {
    "kernel": [
        None,
        sklearn.gaussian_process.kernels.RBF(),           # Radial Basis Function kernel, assumes smooth functions.
        sklearn.gaussian_process.kernels.Matern(),        # Matern kernel, flexible with parameterized smoothness.
        sklearn.gaussian_process.kernels.WhiteKernel()    # White noise kernel, adds random noise.
    ],
    "alpha": [1e-10, 1e-5, 0.1, 1.0],  # Range of noise levels for regularization.
    "optimizer": ["fmin_l_bfgs_b", None],  # Standard optimizer or no optimization.
    "n_restarts_optimizer": [0, 5, 10],  # Number of optimizer restarts.
    "normalize_y": [True, False],  # Normalize target values or use raw values.
    "copy_X_train": [True, False],  # Copy training data or allow modifications.
    "random_state": [None, 42, 0],  # Reproducibility settings.
}


### Kernels and Kernel Suitability

<table style="width: 35%; display: inline-block; vertical-align: top;">
  <tr>
    <th>Kernel</th>
    <th>Hyperparameter</th>
  </tr>
  <tr>
    <td>CompoundKernel</td>
    <td>RBF</td>
  </tr>
  <tr>
    <td>Sum</td>
    <td>Matern</td>
  </tr>
  <tr>
    <td>Product</td>
    <td>White</td>
  </tr>
  <tr>
    <td>DotProduct</td>
    <td>Pairwise</td>
  </tr>
  <tr>
    <td>Constant</td>
    <td>RationalQuadratic</td>
  </tr>
  <tr>
    <td>Exponential</td>
    <td>ExpSineSquared</td>
  </tr>
  <tr>
    <td>Hyperparameter</td>
    <td>Exponential</td>
  </tr>
</table>

<table style="width: 45%; display: inline-block; vertical-align: top;">
  <tr>
    <th>Suitability</th>
    <th>Kernels</th>
  </tr>
  <tr>
    <td>Classification</td>
    <td>RBF, Matern, Dot Product, Exponentiation, Pairwise</td>
  </tr>
  <tr>
    <td>Regression</td>
    <td>RBF, Matern, White, RationalQuadratic</td>
  </tr>
  <tr>
    <td>Both</td>
    <td>Compound, Sum, Product</td>
  </tr>
</table>


### Kernel

In [None]:
import sklearn.gaussian_process

# The Kernel class serves as a base class for all kernel functions in scikit-learn.
# It provides the interface and common functionality for different kernels, allowing the user to build custom kernels by extending this class.
# Best suited when you want to define a custom kernel or extend existing kernel functionality to fit your specific use case.

kernel = sklearn.gaussian_process.kernels.Kernel()



from sklearn.gaussian_process.kernels import Kernel # type: ignore
import numpy as np

class CustomKernel(Kernel):
    def __init__(self, param1=1.0):
        self.param1 = param1
    
    def __call__(self, X, Y=None):
        return np.exp(-self.param1 * np.linalg.norm(X - Y)**2)  # Implement your custom kernel function here: X and Y are the inputs (could be data points or matrices)

    def diag(self, X):
        return np.ones(X.shape[0])                               # Implement the diagonal part of the kernel (for computing the diagonal of the kernel matrix).


# Example usage of the custom kernel
custom_kernel = CustomKernel(param1=2.0)


### Hyperparameter 

In [None]:
"""
In `sklearn`, kernels such as `ConstantKernel` have hyperparameters that can be accessed and adjusted. 
These hyperparameters define properties like constant value, length scale, etc., for the kernel.
We can use the `get_params()` method to access and modify the kernel's hyperparameters, and 
the `hyperparameters` attribute to directly interact with them.
"""
from sklearn.gaussian_process.kernels import ConstantKernel
from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor

# Create a synthetic dataset for testing
X, y = make_friedman2(n_samples=50, noise=0, random_state=0)

# Define a kernel with a constant value and its bounds for optimization
kernel = ConstantKernel(constant_value=1.0,constant_value_bounds=(0.0, 10.0))

#------------------------------------------------------------------------------------------------------------------------------
# Access hyperparameters of the kernel
for hyperparameter in kernel.hyperparameters:
    print(hyperparameter)

# Output: Hyperparameter(name='constant_value', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False)
#------------------------------------------------------------------------------------------------------------------------------
# Get all parameters of the kernel
params = kernel.get_params()
for key in sorted(params):
    print(f"{key} : {params[key]}")

# Output: constant_value : 1.0
# constant_value_bounds : (0.0, 10.0)
#------------------------------------------------------------------------------------------------------------------------------


### Compound kernel

In [None]:
import sklearn.gaussian_process
# The CompoundKernel combines multiple kernels together to model complex patterns in data.
# It allows using additive or multiplicative combinations of different kernel functions to capture various aspects of the data.

# Best suited for datasets with mixed patterns, where different kernel types are needed to capture different features like periodicity, smoothness, and noise.

compoundkernel_kernel = sklearn.gaussian_process.kernels.CompoundKernel(
    kernel1=None,                        # The first kernel in the compound kernel.
                                         # Default: None. This can be any valid kernel (e.g., RBF, Matern, etc.).
                                         # Represents the first component in the compound kernel. 
                                         # Useful for capturing certain patterns in data (e.g., smooth trends with RBF).
    
    kernel2=None,                        # The second kernel in the compound kernel.
                                         # Default: None. This can be any valid kernel.
                                         # Represents the second component in the compound kernel. 
                                         # Useful for capturing different patterns, like periodicity with ExpSineSquared.
    
    factor=1.0                           # A scaling factor to adjust the combined kernel.
                                         # Default: 1.0. Adjusts the strength of the kernel combination.
                                         # A factor > 1.0 emphasizes the influence of the kernels, while a value < 1.0 reduces their effect.
)


CompoundKernel_hyperparameters = {
    "kernel1": [sklearn.gaussian_process.kernels.RBF(length_scale=1.0), sklearn.gaussian_process.kernels.Matern(length_scale=1.0)],  # First kernel to combine (e.g., RBF or Matern).
    "kernel2": [sklearn.gaussian_process.kernels.ExpSineSquared(periodicity=1.0), sklearn.gaussian_process.kernels.RationalQuadratic()],  # Second kernel to combine (e.g., ExpSineSquared or RationalQuadratic).
    "factor": [0.1, 1.0, 10.0],  # Factor controlling the strength of the kernel combination.
                                 # Default: 1.0. A larger factor increases the contribution of the kernels, while a smaller factor reduces it.
}


### Sum kernel

In [None]:
import sklearn.gaussian_process

"""
The Sum kernel is used to combine multiple kernels by summing their individual outputs. 
It is useful when the data is believed to have multiple underlying processes, each captured by a different kernel.
This allows modeling of complex data patterns that may have both smooth and rough characteristics, or different periodic behaviors.
"""
sum_kernel = sklearn.gaussian_process.kernels.Sum(
    k1=sklearn.gaussian_process.kernels.RBF(length_scale=1.0),  # The first base-kernel of the sum-kernel. Models smooth variations in data.
    k2=sklearn.gaussian_process.kernels.Matern(length_scale=1.0, nu=1.5)  # The second base-kernel of the sum-kernel. Models rougher variations.
)


Product_hyperparameters = {
    "k1": [sklearn.gaussian_process.kernels.RBF(length_scale=1.0), sklearn.gaussian_process.kernels.Matern(length_scale=1.0, nu=1.5)],  # The first kernel in the product combination.
    "k2": [sklearn.gaussian_process.kernels.RBF(length_scale=1.0), sklearn.gaussian_process.kernels.ExpSineSquared(periodicity=1.0)],  # The second kernel in the product combination.
}


### Product kernel

In [None]:
import sklearn.gaussian_process

"""
The Product kernel models interactions between different features of the data by multiplying the results of two other kernels. 
It is useful when the data is believed to have interactions between different processes or features, where each feature or process is described by its own kernel.
"""
product_kernel = sklearn.gaussian_process.kernels.Product(
    k1=sklearn.gaussian_process.kernels.RBF(length_scale=1.0),  # The first base-kernel of the product-kernel. It could be any kernel, like RBF, Matern, etc.
    k2=sklearn.gaussian_process.kernels.Matern(length_scale=1.0, nu=1.5)  # The second base-kernel of the product-kernel. It models the interaction of the data with the first kernel.
)

Product_hyperparameters = {
    "k1": [sklearn.gaussian_process.kernels.RBF(length_scale=1.0), sklearn.gaussian_process.kernels.Matern(length_scale=1.0, nu=1.5)],  # The first kernel in the product combination.
    "k2": [sklearn.gaussian_process.kernels.RBF(length_scale=1.0), sklearn.gaussian_process.kernels.ExpSineSquared(periodicity=1.0)],  # The second kernel in the product combination.
}


### Dot product kernel

In [None]:
import sklearn.gaussian_process

"""
The DotProduct kernel represents the inner product of two vectors in a high-dimensional space. 
This kernel is typically used for modeling data where the relationship between points is based on their similarity in the feature space, without any scaling or transformation.
It is useful for linear regression problems or any scenario where you want to model similarity through dot products in the feature space.
"""
dot_product_kernel = sklearn.gaussian_process.kernels.DotProduct(
    sigma_0=1.0,                  # Controls the variance of the dot product kernel. 
                                 # Default: 1.0. A larger value scales up the kernel, allowing greater similarity between points.
    
    sigma_0_bounds=(1e-5, 1e5),   # Tuple specifying the lower and upper bounds for `sigma_0` during hyperparameter optimization.
                                 # Default: (1e-5, 1e5). This allows for a wide range of values, ensuring the kernel can adjust to different scales.
)

DotProduct_hyperparameters = {
    "sigma_0": [1.0, 0.1, 10.0],   # Controls the variance of the kernel. A smaller value reduces the kernel's influence, while a larger value increases it.
    "sigma_0_bounds": [(1e-5, 1e2), (1e-2, 1e3)],  # Specifies the range of values `sigma_0` can take during optimization.
}


### Constant kernel

In [None]:
import sklearn.gaussian_process

"""
The ConstantKernel is used to model a constant signal with a fixed magnitude across all data points. 
It is useful for cases where the data is expected to have a constant variance or baseline value, 
independent of the input features. This kernel is typically used to model the overall scale of the function.
"""
constant_kernel = sklearn.gaussian_process.kernels.ConstantKernel(
    constant_value=1,  # The constant magnitude of the kernel. Default is 1. It represents the overall variance or scale of the process.
    constant_value_bounds=(1e-5, 1e5)  # Tuple specifying the lower and upper bounds for the constant value during hyperparameter optimization.
)



# Example usage of the ConstantKernel in a Gaussian Process Regressor model.
from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel

X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
kernel = RBF() + ConstantKernel(constant_value=2)  # Using the ConstantKernel to model the baseline signal with a magnitude of 2
gpr = GaussianProcessRegressor(kernel=kernel, alpha=5, random_state=0).fit(X, y)
print(gpr.score(X, y))  # Evaluates the model performance
gpr.predict(X[:1, :], return_std=True)  # Predicts and returns the standard deviation of the prediction


### ExpSineSquared kernel.

In [None]:
import sklearn.gaussian_process

"""
The Exponential Sine Squared kernel models periodic data by capturing oscillatory patterns with a specified periodicity and smoothness.

The formula for the kernel is:
    k(x, x') = exp( - (2 * sin^2( (π * |x - x'|) / periodicity ) ) / length_scale^2 )

Where:
- k(x, x') is the kernel function that computes the similarity between two input points x and x'.
- |x - x'| represents the absolute difference between the input points x and x'.
- sin^2( (π * |x - x'|) / periodicity ) captures the periodic nature of the data. The periodicity controls how often the oscillations repeat.
- length_scale controls the smoothness of the oscillations. A smaller value results in more tightly fitting periodic variations, while a larger value leads to smoother oscillations.
- exp() is the exponential function that ensures the similarity between points decreases as the distance between them increases.

Best suited for datasets with strong periodic or seasonal patterns, such as time-series with cycles, biological rhythms,
or oscillatory physical processes.


"""


expsinesquared_kernel = sklearn.gaussian_process.kernels.ExpSineSquared(
    length_scale=1,                      # Controls the smoothness of oscillations. 
                                         # Default: 1. A smaller value results in more tightly fitting periodic variations. 
                                         # Larger values lead to smoother and more gradual oscillations.
    
    periodicity=1,                       # Specifies the period of oscillations. 
                                         # Default: 1. A smaller value results in more frequent oscillations.
                                         # Larger values result in longer periodic cycles.
    
    length_scale_bounds=(1e-5, 1e5),     # Tuple specifying the lower and upper bounds for length_scale during hyperparameter optimization.
                                         # Default: (1e-5, 1e5). This wide range allows the optimizer to search for both tight (small) and smooth (large) oscillations.
    
    periodicity_bounds=(1e-5, 1e5)       # Tuple specifying the lower and upper bounds for periodicity during hyperparameter optimization.
                                         # Default: (1e-5, 1e5). This wide range allows the optimizer to search for both short and long periods in the data.
)

ExpSineSquared_hyperparameters = {
    "length_scale": [0.1, 1.0, 10.0],  # Controls smoothness of oscillations. Smaller values for sharper changes; larger for smoother patterns.
    "periodicity": [0.5, 1.0, 5.0],  # Defines the period of oscillations. Smaller values for shorter cycles; larger for longer cycles.
    "length_scale_bounds": [(1e-5, 1e2), (1e-2, 1e3)],  # Range within which `length_scale` can vary during optimization.
    "periodicity_bounds": [(1e-5, 1e2), (1e-2, 1e3)],  # Range within which `periodicity` can vary to capture diverse periodic patterns.
}


### Exponention Kernel

In [None]:
import sklearn.gaussian_process

"""
The Exponentiation kernel is used to apply exponentiation to the result of another kernel. 
It is useful when you want to control the smoothness of the kernel by modifying its behavior with an exponent.
This kernel can model more complex relationships by scaling the output of another kernel to a higher or lower power.

The kernel is defined as:

k(X, Y) = (k_0(X, Y))^exponent

Where k_0 is the base kernel and the exponent controls the magnitude of the kernel's output.

This is typically used when the data may require a non-linear transformation of the base kernel, such as scaling the interaction between features.
"""
exponentiation_kernel = sklearn.gaussian_process.kernels.Exponentiation(
    kernel=sklearn.gaussian_process.kernels.RationalQuadratic(),  # The base kernel which the exponentiation is applied to.
    exponent=2  # The exponent to which the output of the base kernel is raised. Default: 2.0.
)

Exponentiation_hyperparameters = {
    "kernel": [sklearn.gaussian_process.kernels.RationalQuadratic(), sklearn.gaussian_process.kernels.RBF(length_scale=1.0)],  # Base kernels to which exponentiation can be applied.
    "exponent": [0.5, 1.0, 2.0, 3.0],  # Exponent values controlling the scale of the kernel output. Default: 2.0.
}

# Example usage:
from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RationalQuadratic, Exponentiation

X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
kernel = Exponentiation(RationalQuadratic(), exponent=2)
gpr = GaussianProcessRegressor(kernel=kernel, alpha=5, random_state=0).fit(X, y)
print(gpr.score(X, y))  # 0.419...
print(gpr.predict(X[:1, :], return_std=True))  # Predicted values with standard deviation.


### RBF  kernel

In [None]:
import sklearn.gaussian_process

"""
The RBF (Radial Basis Function) kernel, also known as the Gaussian kernel, is a popular kernel in Gaussian Processes 
for modeling smooth and continuous functions. It is defined as:

k(x_i, x_j) = exp(-||x_i - x_j||^2 / (2 * length_scale^2))

Where:
    - ||x_i - x_j|| is the Euclidean distance between the points x_i and x_j.
    - length_scale controls the smoothness of the kernel: smaller values make the kernel more sensitive to local changes, 
      resulting in a less smooth function, while larger values lead to smoother functions.

The RBF kernel is widely used for modeling smooth, non-linear relationships in regression and classification tasks.

The `length_scale` parameter defines the scale of the kernel. It determines how much influence each data point has on 
others in the kernel's evaluation. A small length scale means that only nearby points have a significant effect,
 while a large length scale means that points further apart have a more significant influence.

"""
rbf_kernel = sklearn.gaussian_process.kernels.RBF(
    length_scale=1.0,  # Controls the smoothness of the function. Smaller values for sharper variations.
    length_scale_bounds=(1e-5, 1e5)  # Bounds on the `length_scale` parameter during optimization.
)

RBF_hyperparameters = {
    "length_scale": [0.1, 1.0, 10.0],  # Controls the smoothness of the function.
    "length_scale_bounds": [(1e-5, 1e2), (1e-2, 1e3)],  # Boundaries for hyperparameter optimization.
}

# Example usage:
from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

# Load a sample dataset
X, y = load_iris(return_X_y=True)

# Define the kernel
kernel = 1.0 * RBF(length_scale=1.0)

# Create and fit a Gaussian Process Classifier with the RBF kernel
gpc = GaussianProcessClassifier(kernel=kernel, random_state=0).fit(X, y)

# Output the score and predicted probabilities
print(gpc.score(X, y))  # e.g., 0.9866...
print(gpc.predict_proba(X[:2, :]))  # Predicted probabilities for the first two samples.


### Matern kernel

In [None]:
import sklearn.gaussian_process

"""
The Matern kernel is a generalization of the Radial Basis Function (RBF) kernel, defined by a smoothness parameter nu. 
It is used in Gaussian Processes to model smoothness and regularity of the underlying function. The kernel is defined as:

"""
matern_kernel = sklearn.gaussian_process.kernels.Matern(
    length_scale=1.0,  # Controls the smoothness of the function. Smaller values result in sharper variations.
    length_scale_bounds=(1e-5, 1e5),  # Lower and upper bounds for `length_scale` during optimization.
    nu=1.5  # Defines the smoothness of the kernel. Default is 1.5. Can be set to 0.5, 1.5, 2.5, or infinity.
)

Matern_hyperparameters = {
    "length_scale": [0.1, 1.0, 10.0],  # Controls smoothness of the kernel. Smaller values for sharper variations.
    "length_scale_bounds": [(1e-5, 1e2), (1e-2, 1e3)],  # Ranges for hyperparameter optimization.
    "nu": [0.5, 1.5, 2.5, float('inf')],  # Controls the smoothness. Default: 1.5.
}

# Example usage:
from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import Matern

X, y = load_iris(return_X_y=True)
kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)
gpc = GaussianProcessClassifier(kernel=kernel, random_state=0).fit(X, y)
print(gpc.score(X, y))  # 0.9866...
print(gpc.predict_proba(X[:2, :]))  # Predicted probabilities for the first two samples.


### White Kernel

In [None]:
import sklearn.gaussian_process

"""
The WhiteKernel is used to model noise in Gaussian Process regression. It represents the noise level as a 
constant, which is independent of the input data.

Mathematically, the White kernel is defined as:     k(x_1, x_2) = noise_level if x_1 == x_2 else 0

This means that the kernel value is equal to `noise_level` when both inputs are the same (diagonal elements), 
and 0 when they are different (off-diagonal elements).

The main use-case of this kernel is as part of a sum-kernel, where it captures the noise or variance of the signal, 
modeled as independently and identically normally distributed (i.i.d.). 
The parameter `noise_level` controls the variance of the noise.
"""


white_kernel = sklearn.gaussian_process.kernels.WhiteKernel(
    noise_level=1.0,               # Controls the magnitude of the noise in the data. Default: 1.0. 
                                   # A higher value results in higher noise level, affecting the model's ability to fit the data.
    
    noise_level_bounds="fixed"     # Specifies the bounds for noise_level during hyperparameter optimization. 
                                   # Default: "fixed", meaning no optimization is applied to the noise_level. 
                                   # It can also be set as a tuple like (1e-5, 1e5) to allow optimization within a range.
)

WhiteKernel_hyperparameters = {
    "noise_level": [0.1, 1.0, 10.0],       # Controls the variance of noise. A smaller value reduces the noise, while a larger value increases it.
    "noise_level_bounds": [(1e-5, 1e5)],    # A range for optimizing the `noise_level` during model fitting. 
}


# Example usage:
from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
kernel = DotProduct() + WhiteKernel(noise_level=0.5)
gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(X, y)
print(gpr.score(X, y))  # 0.3680...
print(gpr.predict(X[:2,:], return_std=True))  # Predicted values with standard deviation.


### Pairwise Kernel


In [None]:
""""
The Pairwise Kernel allows SVM to compute similarities between data points,
enabling it to classify or regress on complex, non-linear datasets by transforming
them into a higher-dimensional feature space.
"""

import sklearn.gaussian_process

pairwise_kernel = sklearn.gaussian_process.kernels.PairwiseKernel(
    gamma=1.0,                               # Controls the flexibility of the kernel. Smaller values make the kernel more flexible (fitting more noise), while larger values make it more rigid (smoothing).
                                             # Should be positive. Default: 1.0.
    gamma_bounds=(1e-5, 1e5),                # Lower and upper bounds for gamma during hyperparameter tuning. Default: (1e-5, 1e5).
    metric='rbf',                            # Various metrics like 'linear', 'rbf', 'cosine', etc., used to calculate pairwise similarities.
)

PairwiseKernel_hyperparameters = {
    "gamma": [0.1, 1.0, 10.0],                   # Controls the scale of the kernel. Smaller values for more flexibility; larger values for more rigidity.
    "metric": ["linear", "additive_chi2", "chi2", "poly", "polynomial", "rbf", "laplacian", "sigmoid", "cosine"],
                                                 # Various metrics like 'linear', 'rbf', 'cosine', etc., used to calculate pairwise similarities.
    "gamma_bounds": [(1e-5, 1e2), (1e-2, 1e3)],  # Ranges for `gamma` during optimization.
}

"""
1.Linear Kernel                 : For linearly separable data.
2.Additive Chi-Square Kernel    : For count or frequency data.
3.Chi-Square Kernel             : For count or frequency data, similar to the additive_chi2 but without additive scaling.
4.Polynomial Kernel             : When data has polynomial relationships.
5.RBF Kernel                    : For non-linear relationships with unknown data distributions.
6.Laplacian Kernel              : For emphasizing local similarities and handling sparse data.
7.Sigmoid Kernel                : For neural network-like data structures or binary classification.
8.Cosine Kernel                 : For measuring vector similarity, especially in text or high-dimensional spaces.
"""



# Example usage:
from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import PairwiseKernel

X, y = load_iris(return_X_y=True)
kernel = PairwiseKernel(metric='rbf')  # Use RBF (Radial Basis Function) kernel for pairwise similarity
gpc = GaussianProcessClassifier(kernel=kernel, random_state=0).fit(X, y)
print(gpc.score(X, y))  # 0.9733...
print(gpc.predict_proba(X[:2, :]))  # Predicted probabilities for the first two samples.


### RationalQuadratic Kernel

In [None]:
import sklearn.gaussian_process

"""
The RationalQuadratic kernel is used for modeling data with varying smoothness. It can be seen as a scale mixture
 of squared exponential kernels, allowing it to model data that exhibits different levels of smoothness at different scales.

The kernel is defined as:

k(x_i, x_j) = (1 + (d(x_i, x_j)^2) / (2αl^2))^(-α)


Where:
- alpha is the scale mixture parameter (controls the relative weight of the different scales),
- l is the length scale (determines the smoothness of the kernel),
- (d(x_i, x_j)) is the Euclidean distance between input points \(x_i\) and \(x_j\).

This kernel is useful for situations where data has long-range correlations but with varying smoothness over different ranges.

"""
RationalQuadratic_kernel = sklearn.gaussian_process.kernels.RationalQuadratic(
    length_scale=1.0,                  # The length scale controls the smoothness of the kernel. Default is 1.0.
                                       # Smaller values lead to more sensitive kernels (fitting noise), larger values lead to smoother models.
    
    alpha=1.0,                         # The scale mixture parameter controls the contribution of multiple length scales.
                                       # Default is 1.0. Higher values allow for more flexibility in modeling varying smoothness in the data.
    
    length_scale_bounds=(1e-5, 1e5),    # Bounds on the length scale during optimization. Default: (1e-5, 1e5).
                                       # You can use this to restrict the search space during hyperparameter tuning.
    
    alpha_bounds=(1e-5, 1e5)            # Bounds on the alpha parameter during optimization. Default: (1e-5, 1e5).
                                       # This allows flexibility during tuning to capture different smoothness levels.
)

RationalQuadratic_hyperparameters = {
    "length_scale": [0.1, 1.0, 10.0],  # Length scale influences how smooth the model is. Smaller values make the kernel more sensitive to noise.
    "alpha": [0.5, 1.0, 2.0],         # Alpha controls how much smoothness is allowed at different length scales.
    "length_scale_bounds": [(1e-5, 1e2), (1e-2, 1e3)],  # Bounds on the length scale for optimization.
    "alpha_bounds": [(1e-5, 1e2), (1e-2, 1e3)],        # Bounds on the alpha parameter for optimization.
}




# Example usage:
from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RationalQuadratic

X, y = load_iris(return_X_y=True)
kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)
gpc = GaussianProcessClassifier(kernel=kernel, random_state=0).fit(X, y)
print(gpc.score(X, y))  # Example output: 0.9733...
print(gpc.predict_proba(X[:2, :]))  # Predicted probabilities for the first two data points.
