In [None]:
https://sci-hub.se/10.1016/j.conbuildmat.2020.118803
https://www.researchgate.net/publication/271949910_An_Artificial_Neural_Networks_Model_for_Predicting_Permeability_Properties_of_Nano_Silica-Rice_Husk_Ash_Ternary_Blended_Concrete

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import lognorm, norm
from scipy.spatial.distance import pdist, cdist, squareform
from sklearn.model_selection import train_test_split
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.gaussian_process import kernels as gpk
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
class SpectralMixture(gpk.Kernel):
    def __init__(self, q, w, m, v, d, active_dim=None):
        self.q, self.w, self.m, self.v, self.d = q, w, m, v, d
        self.active_dim = active_dim

    @property
    def anisotropic(self):
        return False

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("v", "numeric", self.v.ravel(), len(self.v.ravel()))

    @property
    def hyperparameter_mean(self):
        return gpk.Hyperparameter("m", "numeric", self.m.ravel(), len(self.m.ravel()))

    @property
    def hyperparameter_weight(self):
        return gpk.Hyperparameter("w", "numeric", self.w.ravel(), len(self.w.ravel()))

    def __call__(self, X, Y=None, eval_gradient=False):
        w, m, v = self.w[:, np.newaxis], np.reshape(self.m, (self.d, self.q)), np.reshape(self.v, (self.d, self.q))
        assert w.shape == (q, 1), 'Weights must be [q x 1]'
        assert m.shape[1] == q
        assert v.shape[1] == q
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
        tau = X[:, np.newaxis, :] - Y

        # tau(m,n,p) tensordot means(p,q) -> dot_prod(m,n,q)
        # where dot_prod[i,j,k] = tau[i,j]'*means[:,k]
        K = np.cos(2 * np.pi * np.tensordot(tau, m, axes=1)) * \
            np.exp(-2 * np.pi ** 2 * np.tensordot(tau ** 2, v, axes=1))

        # return the weighted sum of the individual
        # Gaussian kernels, dropping the third index
        return np.tensordot(K, w, axes=1).squeeze(axis=(2,))

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        return "{0}(weight=[{1}], mean=[{2}], variance=[{3}])".format(
            self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.w)),
            ", ".join(map("{0:.3g}".format, self.m)), ", ".join(map("{0:.3g}".format, self.v)))
        
class PiecewisePolynomial(gpk.Kernel):
    # implemented_q = np.asarray([0,1,2,3])

    def __init__(self, length_scale, q=0, active_dim=None):
        self.q = q
        self.length_scale = length_scale
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'length_scale and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    @property
    def hyperparameter_q(self):
        return gpk.Hyperparameter("q", "numeric", self.q)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("length_scale", "numeric", self.length_scale, len(self.length_scale))
        return gpk.Hyperparameter("length_scale", "numeric", self.length_scale)

    def fmax(self, r, j, q):
        return np.power(np.maximum(0.0, 1 - r), j + q)

    def get_cov(self, r, j, q):
        if q == 0:
            return 1
        if q == 1:
            return (j + 1) * r + 1
        if q == 2:
            return 1 + (j + 2) * r + ((j ** 2 + 4 * j + 3) / 3.0) * r ** 2
        if q == 3:
            return (
                    1
                    + (j + 3) * r
                    + ((6 * j ** 2 + 36 * j + 45) / 15.0) * r ** 2
                    + ((j ** 3 + 9 * j ** 2 + 23 * j + 15) / 15.0) * r ** 3
            )
        else:
            raise ValueError("Requested kernel q is not implemented.")

    def __call__(self, X, Y=None, eval_gradient=False):
        q = int(np.round(self.q))  # int(self.implemented_q[np.argmin(np.abs(self.implemented_q - q))])
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            r = pdist(X / self.length_scale, metric="cityblock")
            r = squareform(r)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            r = cdist(X / self.length_scale, Y / self.length_scale, metric="cityblock")
        j = np.floor(X.shape[1] / 2.0) + q + 1
        return self.fmax(r, j, self.q) * self.get_cov(r, j, q)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        if self.anisotropic:
            return "{0}(q={1:.3g}, length_scale=[{2}])".format(
                self.__class__.__name__, self.q, ", ".join(map("{0:.3g}".format, self.length_scale)))
        else:  # isotropic
            return "{0}(q={1:.3g}, length_scale={2:.3g})".format(
                self.__class__.__name__, self.q, self.length_scale)

class Polynomial(gpk.Kernel):

    def __init__(self, variance=1.0, offset=0.0, degree=1.0, active_dim=None):
        self.degree = degree
        self.variance = variance
        self.offset = offset
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(self.variance), 'variance and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.variance) and len(self.variance) > 1

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("degree", "numeric", self.degree)

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("offset", "numeric", self.offset)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("variance", "numeric", self.variance, len(self.variance))
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            return (np.matmul(X * self.variance, X.T) + self.offset) ** self.degree
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            return (np.tensordot(X * self.variance, Y, [[-1], [-1]]) + self.offset) ** self.degree

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance=[{1}], offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.variance)), self.offset, self.degree)
        else:  # isotropic
            return "{0}(variance={1:.3g}, offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, self.variance, self.offset, self.degree)


class Brownian(gpk.Kernel):

    def __init__(self, variance=1.0, active_dim=None):
        if len(active_dim) != 1:
            raise ValueError("Input dimensional for Brownian kernel must be 1.")
        self.variance = variance
        self.active_dim = active_dim

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y

        return np.where(np.sign(X) == np.sign(Y.T), self.variance * np.fmin(np.abs(X), np.abs(Y.T)), 0.)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        return "{0}(variance={1:.3g})".format(self.__class__.__name__, self.variance)


class ArcCosine(gpk.Kernel):
    implemented_orders = {0, 1, 2}

    def __init__(self, order=0, variance=1.0, weight_variances=1.0, bias_variance=1.0, active_dim=None):
        if order not in self.implemented_orders:
            raise ValueError("Requested kernel order is not implemented.")
        self.order = order
        self.variance = variance
        self.bias_variance = bias_variance
        self.weight_variances = weight_variances
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.weight_variances), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.weight_variances) and len(self.weight_variances) > 1

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    @property
    def hyperparameter_weight_variances(self):
        if self.anisotropic:
            return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances, len(self.weight_variances))
        return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances)

    @property
    def hyperparameter_bias_variance(self):
        return gpk.Hyperparameter("bias_variance", "numeric", self.bias_variance)

    def _weighted_product(self, X, X2=None):
        if X2 is None:
            return np.sum(self.weight_variances * X ** 2, axis=1) + self.bias_variance
        return np.matmul((self.weight_variances * X), X2.T) + self.bias_variance

    def _J(self, theta):
        """
        Implements the order dependent family of functions defined in equations
        4 to 7 in the reference paper.
        """
        if self.order == 0:
            return np.pi - theta
        elif self.order == 1:
            return np.sin(theta) + (np.pi - theta) * np.cos(theta)
        elif self.order == 2:
            return 3.0 * np.sin(theta) * np.cos(theta) + (np.pi - theta) * (
                    1.0 + 2.0 * np.cos(theta) ** 2)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        X_denominator = np.sqrt(self._weighted_product(X))
        if Y is None:
            Y = X
            Y_denominator = X_denominator
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            Y_denominator = np.sqrt(self._weighted_product(Y))

        numerator = self._weighted_product(X, Y)
        cos_theta = numerator / X_denominator[:, None] / Y_denominator[None, :]
        jitter = 1e-15
        theta = np.arccos(jitter + (1 - 2 * jitter) * cos_theta)

        return self.variance * (1.0 / np.pi) * self._J(theta) * X_denominator[:, None] ** self.order * Y_denominator[
                                                                                                       None,
                                                                                                       :] ** self.order

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance={1:.3g}, weight_variances=[{2}], bias_variance={3:.3g})".format(
                self.__class__.__name__, self.variance, ", ".join(map("{0:.3g}".format, self.weight_variances)),
                self.bias_variance)
        else:  # isotropic
            return "{0}(variance={1:.3g}, weight_variances={2:.3g}, bias_variance={2:.3g})".format(
                self.__class__.__name__, self.variance, self.weight_variances, self.bias_variance)


class Gibbs(gpk.Kernel):

    def __init__(self, lfunc, args, active_dim=None):
        self.lfunc = lfunc
        self.args = args
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        rx = self.lfunc(X, **self.args)
        if Y is None:
            rz = self.lfunc(X, **self.args)
            dists = squareform(pdist(X, metric='sqeuclidean'))
            np.fill_diagonal(dists, 1)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            rz = self.lfunc(Y, **self.args)
            dists = cdist(X, Y, metric='sqeuclidean')

        rx2, rz2 = np.reshape(rx ** 2, (-1, 1)), np.reshape(rz ** 2, (1, -1))
        return np.sqrt((2.0 * np.outer(rx, rz)) / (rx2 + rz2)) * np.exp(-1.0 * dists / (rx2 + rz2))

    def diag(self, X):
        return np.alloc(1.0, X.shape[0])

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}".format(self.__class__.__name__)


class WarpedInput(gpk.Kernel):

    def __init__(self, stationary, func, args, active_dim=None):
        self.stationary = stationary
        self.func = func
        self.args = args
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        X = self.func(X, **self.args)
        if Y is not None:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            Y = self.func(Y, **self.args)

        return self.stationary(X, Y, eval_gradient)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        return ''


class Gabor(gpk.Kernel):

    def __init__(self, stationary, length_scale=1.0, periodicity=1.0, active_dim=None):
        self.stationary = stationary
        self.length_scale = length_scale
        self.periodicity = periodicity
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'length_scale and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("periodicity", "numeric", self.periodicity)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("length_scale", "numeric", self.length_scale, len(self.length_scale))
        return gpk.Hyperparameter("length_scale", "numeric", self.length_scale)

    def __call__(self, X, Y=None, eval_gradient=False):
        stationary = self.stationary(length_scale=self.length_scale)
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            dists = squareform(pdist(X / self.length_scale, metric='sqeuclidean'))
            np.fill_diagonal(dists, 1)
            tmp1 = stationary(X, Y, eval_gradient)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            dists = cdist(X / self.length_scale, Y / self.length_scale, metric='sqeuclidean')
            tmp1 = stationary(X, Y, eval_gradient)

        tmp2 = 2 * np.pi * np.sqrt(dists) * self.length_scale / self.periodicity
        return tmp1 * np.cos(tmp2)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        if self.anisotropic:
            return "{0}(length_scale=[{1}], periodicity={2:.3g})".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.length_scale)), self.periodicity)
        else:  # isotropic
            return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format(
                self.__class__.__name__, self.length_scale, self.periodicity)


class ConstantKernel(gpk.ConstantKernel):
    def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(constant_value=constant_value, constant_value_bounds=constant_value_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class Matern(gpk.Matern):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5, active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, nu=nu)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class RationalQuadratic(gpk.RationalQuadratic):
    def __init__(self, length_scale=1.0, alpha=1.0, length_scale_bounds=(1e-05, 100000.0), alpha_bounds=(1e-05, 100000.0),
                 active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, alpha=alpha, alpha_bounds=alpha_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)
        

class RBF(gpk.RBF):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class ExpSineSquared(gpk.ExpSineSquared):
    def __init__(self, length_scale=1.0, periodicity=1.0, length_scale_bounds=(1e-5, 1e5),
                 periodicity_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, periodicity=periodicity, length_scale_bounds=length_scale_bounds,
                         periodicity_bounds=periodicity_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class WhiteKernel(gpk.WhiteKernel):
    def __init__(self, noise_level=1.0, noise_level_bounds=(1e-05, 100000.0), active_dim=None):
        super(WhiteKernel, self).__init__(noise_level=noise_level, noise_level_bounds=noise_level_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

In [None]:
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/regression/concrete_strength/permeability/permeability.csv')
df

Unnamed: 0,W/C,Size(mm),A/C,UCS(mpa),Permeability(mm/s)
0,0.25,3.5,2.5,15.3,0.62
1,0.25,3.5,3.0,14.2,0.89
2,0.25,3.5,3.5,13.1,1.42
3,0.25,3.5,4.0,11.8,1.93
4,0.25,3.5,4.5,10.7,2.31
...,...,...,...,...,...
85,0.50,13.0,2.5,10.5,0.95
86,0.50,13.0,3.0,10.0,1.01
87,0.50,13.0,3.5,9.1,1.35
88,0.50,13.0,4.0,7.5,2.10


In [None]:
pd.DataFrame({'Dtype': df.dtypes, 'Nunique': df.nunique(), 'Isnull': df.isnull().sum()}, index=df.columns)

Unnamed: 0,Dtype,Nunique,Isnull
W/C,float64,6,0
Size(mm),float64,3,0
A/C,float64,5,0
UCS(mpa),float64,76,0
Permeability(mm/s),float64,83,0


In [None]:
df.columns

Index(['W/C', 'Size(mm)', 'A/C', 'UCS(mpa)', 'Permeability(mm/s)'], dtype='object')

In [None]:
out_column = 'UCS(mpa)'
to_remove = ['Permeability(mm/s)']
features = [c for c in df.columns if (c != out_column) and (c not in to_remove)]
categorical = [c for c in features if (df[c].dtype=='object') and (df[c].nunique() > 2)]
binary = [c for c in features if df[c].nunique() == 2]
numerical = [col for col in features if col not in categorical + binary]
df[numerical] = df[numerical].apply(pd.to_numeric,1)
fig = go.Figure(data=go.Heatmap(z=df[numerical+[out_column]].corr(),x=numerical+[out_column],y=numerical+[out_column]))
fig.show()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(df[features], df[out_column], test_size=0.2, random_state=42)

YScaler = MinMaxScaler(feature_range=(0, 1))
Y_train, Y_test = YScaler.fit_transform(Y_train[:,None]).ravel() + 1e-15, Y_test.values.ravel() + 1e-15

MS = MinMaxScaler(feature_range=(0, 1))
scaled_train = MS.fit_transform(X_train[numerical])
scaled_test = MS.transform(X_test[numerical])
X_train[numerical], X_test[numerical] = scaled_train, scaled_test

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



((72, 3), (18, 3), (72,), (18,))

In [None]:
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
lb, ub = 1e-5, 1e3
lb, ub = [lb, 0, lb, lb]*len(nidx) +[lb], [ub, 3, ub, ub]*len(nidx) +[ub]

In [None]:
#UCS
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index

hp = [1.07704496e+00, 3.02839347e-01, 7.98982930e-04, 4.21078933e+00,
       9.93747009e-01, 6.47095468e-05]

def create_kernel(hp):
    poly = ArcCosine(2, hp[len(nidx)], hp[:len(nidx)], hp[len(nidx) + 1])
    kernel = poly + gpk.WhiteKernel(hp[len(nidx)+2])
    kernel_d = {'poly': poly}
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test, std = np.full(len(X_train), Y_train.mean()), np.sqrt(np.diag(cov))
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

lb, ub = norm.ppf(0.025, mu_test, std), norm.ppf(0.975, mu_test, std)
plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

In [None]:
#PERMEABILITY
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index

hp = [6.14028579e+02, 6.02858018e+01, 1.91530376e+02, 1.00000000e-05,
       1.03302146e+02, 4.91480741e-04]

def create_kernel(hp):
    poly = ArcCosine(2, hp[len(nidx)], hp[:len(nidx)], hp[len(nidx) + 1])
    kernel = poly + gpk.WhiteKernel(hp[len(nidx)+2])
    kernel_d = {'poly': poly}
    return kernel, kernel_d

kernel, kernel_d = create_kernel(hp)
cov = kernel(X_train)
mu_test, std = np.full(len(X_train), Y_train.mean()), np.sqrt(np.diag(cov))
samples = np.random.multivariate_normal(mu_test.ravel(), cov, 3)

lb, ub = norm.ppf(0.025, mu_test, std), norm.ppf(0.975, mu_test, std)
plot_cov([cov, kernel(X_train, X_test)], 2, '').show()

In [None]:
mus = []
for i in kernel_d:
    k = kernel_d[i]
    gpr = GPR(kernel=k, optimizer=None, alpha=1e-5).fit(X_train, Y_train)
    mus.append(gpr.predict(X_test, return_std=False))
mus = YScaler.inverse_transform(np.stack(mus))
mus = dict(zip(kernel_d.keys(), mus))
kernel_d

{'Cement (component 1)(kg in a m^3 mixture)': ArcCosine(variance=4.91, weight_variances=0.912, bias_variance=0.912) + WhiteKernel(noise_level=0.000118),
 'Blast Furnace Slag (component 2)(kg in a m^3 mixture)': ArcCosine(variance=0.0169, weight_variances=47.2, bias_variance=47.2) + WhiteKernel(noise_level=0.000162),
 'Fly Ash (component 3)(kg in a m^3 mixture)': ArcCosine(variance=0.0261, weight_variances=7.93, bias_variance=7.93) + WhiteKernel(noise_level=1.24e-05),
 'Water  (component 4)(kg in a m^3 mixture)': ArcCosine(variance=12.9, weight_variances=798, bias_variance=798) + WhiteKernel(noise_level=0.00511),
 'Superplasticizer (component 5)(kg in a m^3 mixture)': ArcCosine(variance=687, weight_variances=0.57, bias_variance=0.57) + WhiteKernel(noise_level=5.81e-05),
 'Coarse Aggregate  (component 6)(kg in a m^3 mixture)': ArcCosine(variance=0.281, weight_variances=15.9, bias_variance=15.9) + WhiteKernel(noise_level=1.47e-05),
 'Fine Aggregate (component 7)(kg in a m^3 mixture)': Arc

In [None]:
gpr = GPR(kernel=kernel, optimizer=None).fit(X_train, Y_train)
mu_train = gpr.predict(X_train)
mu_test, std_test = gpr.predict(X_test, return_std=True)
lb, ub = norm.ppf(0.025, mu_test, std_test), norm.ppf(0.975, mu_test, std_test)

pred = YScaler.inverse_transform(np.stack((mu_test, lb, ub)).T)
Y_train = YScaler.inverse_transform(Y_train[:,np.newaxis]).ravel()
mu_train = YScaler.inverse_transform(mu_train[:,np.newaxis]).ravel()
mu_test = YScaler.inverse_transform(mu_test[:,np.newaxis]).ravel()

eval_df = pd.DataFrame([{'R2': r2_score(Y_train, mu_train), 'MSE': mean_squared_error(Y_train, mu_train, squared=False), 'MAE': mean_absolute_error(Y_train, mu_train)},
                        {'R2': r2_score(Y_test, mu_test), 'MSE': mean_squared_error(Y_test, mu_test, squared=False), 'MAE': mean_absolute_error(Y_test, mu_test)}], index=['TRAIN', 'TEST'])
eval_df

Unnamed: 0,R2,MSE,MAE
TRAIN,0.99936,0.162519,0.11482
TEST,0.999155,0.203979,0.172732


In [None]:
pd.DataFrame({'True': Y_test, 'Pred': pred[:,0], 'LB': pred[:,1], 'UB': pred[:,2]})

Unnamed: 0,True,Pred,LB,UB
0,27.1,27.286886,26.817633,27.756138
1,8.5,8.320778,7.891326,8.750231
2,20.1,20.155515,19.63858,20.67245
3,17.1,17.296205,16.818587,17.773823
4,15.3,15.368326,14.852376,15.884276
5,8.1,8.015106,7.558239,8.471974
6,25.6,25.330066,24.858263,25.801869
7,20.1,20.342138,19.859371,20.824906
8,14.4,14.480999,13.994955,14.967043
9,22.8,22.650635,22.182474,23.118795


In [None]:
from sklearn.inspection import permutation_importance

result = permutation_importance(gpr, X_test, YScaler.transform(Y_test[:,np.newaxis]).ravel(), n_repeats=10, random_state=42, n_jobs=2, scoring='r2')
fig = go.Figure()
for i in range(X_test.shape[1]):
    fig.add_trace(go.Box(x=result['importances'][i], name=X_test.columns[i]))

fig.show()

In [None]:
import numpy as np
import pandas as pd
import dask
from scipy.stats import lognorm, norm
from sklearn.model_selection import train_test_split
from numpy.linalg import LinAlgError
from scipy.spatial.distance import pdist, cdist, squareform
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.gaussian_process import kernels as gpk
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import umap
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
import warnings
import traceback
import optimization.numpy_version.single_objective.continuous as co
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

class Polynomial(gpk.Kernel):

    def __init__(self, variance=1.0, offset=0.0, degree=1.0, active_dim=None):
        self.degree = degree
        self.variance = variance
        self.offset = offset
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(self.variance), 'variance and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.variance) and len(self.variance) > 1

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("degree", "numeric", self.degree)

    @property
    def hyperparameter_periodicity(self):
        return gpk.Hyperparameter("offset", "numeric", self.offset)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("variance", "numeric", self.variance, len(self.variance))
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            return (np.matmul(X * self.variance, X.T) + self.offset) ** self.degree
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            return (np.tensordot(X * self.variance, Y, [[-1], [-1]]) + self.offset) ** self.degree

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance=[{1}], offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.variance)), self.offset, self.degree)
        else:  # isotropic
            return "{0}(variance={1:.3g}, offset={2:.3g}, degree={3:.3g})".format(
                self.__class__.__name__, self.variance, self.offset, self.degree)


class Brownian(gpk.Kernel):

    def __init__(self, variance=1.0, active_dim=None):
        if len(active_dim) != 1:
            raise ValueError("Input dimensional for Brownian kernel must be 1.")
        self.variance = variance
        self.active_dim = active_dim

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y

        return np.where(np.sign(X) == np.sign(Y.T), self.variance * np.fmin(np.abs(X), np.abs(Y.T)), 0.)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        return "{0}(variance={1:.3g})".format(self.__class__.__name__, self.variance)


class PiecewisePolynomial(gpk.Kernel):
    # implemented_q = np.asarray([0,1,2,3])

    def __init__(self, length_scale, q=0, active_dim=None):
        self.q = q
        self.length_scale = length_scale
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'length_scale and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    @property
    def hyperparameter_q(self):
        return gpk.Hyperparameter("q", "numeric", self.q)

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return gpk.Hyperparameter("length_scale", "numeric", self.length_scale, len(self.length_scale))
        return gpk.Hyperparameter("length_scale", "numeric", self.length_scale)

    def fmax(self, r, j, q):
        return np.power(np.maximum(0.0, 1 - r), j + q)

    def get_cov(self, r, j, q):
        if q == 0:
            return 1
        if q == 1:
            return (j + 1) * r + 1
        if q == 2:
            return 1 + (j + 2) * r + ((j ** 2 + 4 * j + 3) / 3.0) * r ** 2
        if q == 3:
            return (
                    1
                    + (j + 3) * r
                    + ((6 * j ** 2 + 36 * j + 45) / 15.0) * r ** 2
                    + ((j ** 3 + 9 * j ** 2 + 23 * j + 15) / 15.0) * r ** 3
            )
        else:
            raise ValueError("Requested kernel q is not implemented.")

    def __call__(self, X, Y=None, eval_gradient=False):
        q = int(np.round(self.q))  # int(self.implemented_q[np.argmin(np.abs(self.implemented_q - q))])
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        if Y is None:
            r = pdist(X / self.length_scale, metric="cityblock")
            r = squareform(r)
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            r = cdist(X / self.length_scale, Y / self.length_scale, metric="cityblock")
        j = np.floor(X.shape[1] / 2.0) + q + 1
        return self.fmax(r, j, self.q) * self.get_cov(r, j, q)

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return True

    def __repr__(self):
        if self.anisotropic:
            return "{0}(q={1:.3g}, length_scale=[{2}])".format(
                self.__class__.__name__, self.q, ", ".join(map("{0:.3g}".format, self.length_scale)))
        else:  # isotropic
            return "{0}(q={1:.3g}, length_scale={2:.3g})".format(
                self.__class__.__name__, self.q, self.length_scale)


class ArcCosine(gpk.Kernel):
    implemented_orders = {0, 1, 2}

    def __init__(self, order=0, variance=1.0, weight_variances=1.0, bias_variance=1.0, active_dim=None):
        if order not in self.implemented_orders:
            raise ValueError("Requested kernel order is not implemented.")
        self.order = order
        self.variance = variance
        self.bias_variance = bias_variance
        self.weight_variances = weight_variances
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.weight_variances), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.weight_variances) and len(self.weight_variances) > 1

    @property
    def hyperparameter_variance(self):
        return gpk.Hyperparameter("variance", "numeric", self.variance)

    @property
    def hyperparameter_weight_variances(self):
        if self.anisotropic:
            return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances, len(self.weight_variances))
        return gpk.Hyperparameter("weight_variances", "numeric", self.weight_variances)

    @property
    def hyperparameter_bias_variance(self):
        return gpk.Hyperparameter("bias_variance", "numeric", self.bias_variance)

    def _weighted_product(self, X, X2=None):
        if X2 is None:
            return np.sum(self.weight_variances * X ** 2, axis=1) + self.bias_variance
        return np.matmul((self.weight_variances * X), X2.T) + self.bias_variance

    def _J(self, theta):
        """
        Implements the order dependent family of functions defined in equations
        4 to 7 in the reference paper.
        """
        if self.order == 0:
            return np.pi - theta
        elif self.order == 1:
            return np.sin(theta) + (np.pi - theta) * np.cos(theta)
        elif self.order == 2:
            return 3.0 * np.sin(theta) * np.cos(theta) + (np.pi - theta) * (
                    1.0 + 2.0 * np.cos(theta) ** 2)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        X = X[:, self.active_dim] if self.active_dim is not None else X
        X_denominator = np.sqrt(self._weighted_product(X))
        if Y is None:
            Y = X
            Y_denominator = X_denominator
        else:
            Y = np.atleast_2d(Y)
            Y = Y[:, self.active_dim] if self.active_dim is not None else Y
            Y_denominator = np.sqrt(self._weighted_product(Y))

        numerator = self._weighted_product(X, Y)
        cos_theta = numerator / X_denominator[:, None] / Y_denominator[None, :]
        jitter = 1e-15
        theta = np.arccos(jitter + (1 - 2 * jitter) * cos_theta)

        return self.variance * (1.0 / np.pi) * self._J(theta) * X_denominator[:, None] ** self.order * Y_denominator[
                                                                                                       None,
                                                                                                       :] ** self.order

    def diag(self, X):
        return np.diag(self(X))

    def is_stationary(self):
        """Returns whether the kernel is stationary. """
        return False

    def __repr__(self):
        if self.anisotropic:
            return "{0}(variance={1:.3g}, weight_variances=[{2}], bias_variance={3:.3g})".format(
                self.__class__.__name__, self.variance, ", ".join(map("{0:.3g}".format, self.weight_variances)),
                self.bias_variance)
        else:  # isotropic
            return "{0}(variance={1:.3g}, weight_variances={2:.3g}, bias_variance={2:.3g})".format(
                self.__class__.__name__, self.variance, self.weight_variances, self.bias_variance)


class ConstantKernel(gpk.ConstantKernel):
    def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(constant_value=constant_value, constant_value_bounds=constant_value_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class Matern(gpk.Matern):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5, active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, nu=nu)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class RationalQuadratic(gpk.RationalQuadratic):
    def __init__(self, length_scale=1.0, alpha=1.0, length_scale_bounds=(1e-05, 100000.0),
                 alpha_bounds=(1e-05, 100000.0),
                 active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds, alpha=alpha,
                         alpha_bounds=alpha_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class RBF(gpk.RBF):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, length_scale_bounds=length_scale_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class ExpSineSquared(gpk.ExpSineSquared):
    def __init__(self, length_scale=1.0, periodicity=1.0, length_scale_bounds=(1e-5, 1e5),
                 periodicity_bounds=(1e-5, 1e5), active_dim=None):
        super().__init__(length_scale=length_scale, periodicity=periodicity, length_scale_bounds=length_scale_bounds,
                         periodicity_bounds=periodicity_bounds)
        self.active_dim = active_dim
        if active_dim is not None and self.anisotropic:
            assert len(self.active_dim) == len(
                self.length_scale), 'weight_variances and active_dim must have the same length'

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)


class WhiteKernel(gpk.WhiteKernel):
    def __init__(self, noise_level=1.0, noise_level_bounds=(1e-05, 100000.0), active_dim=None):
        super(WhiteKernel, self).__init__(noise_level=noise_level, noise_level_bounds=noise_level_bounds)
        self.active_dim = active_dim

    def __call__(self, X, Y=None, eval_gradient=False):
        if self.active_dim == None:
            return super().__call__(X, Y, eval_gradient)
        else:
            X = np.atleast_2d(X)
            X = X[:, self.active_dim]
            if Y is not None:
                Y = np.atleast_2d(Y)
                Y = Y[:, self.active_dim]
            return super().__call__(X, Y, eval_gradient)

def shift_df(df, shift):
    origin = df.copy()
    for i in range(1, shift+1):
        shifted_df = origin.shift(i)
        shifted_df = shifted_df.rename(columns=dict(zip(shifted_df.columns, [str(c)+'_'+str(i) for c in shifted_df.columns])))
        df = pd.concat([shifted_df, df], axis=1)
    return df.dropna()

def _create_kernel(hp):
    kernel, k = 0, 0
    for i, c in enumerate(nidx):
        #hp[i*4+1] = nus[np.argmin(np.abs(nus - hp[i*4+1]))]
        #comp = Matern(hp[i * 4], active_dim=[i], nu=hp[i * 4 + 1]) * ConstantKernel(hp[i * 4 + 2], active_dim=[i]) + WhiteKernel(hp[i * 4 + 3], active_dim=[i])
        comp = RationalQuadratic(hp[i*4], active_dim=[i], alpha=hp[i*4+1]) * ConstantKernel(hp[i*4+2], active_dim=[i]) + WhiteKernel(hp[i*4+3], active_dim=[i])
        #comp = PiecewisePolynomial(hp[i*4], q=hp[i*4+1], active_dim=[c]) * ConstantKernel(hp[i*4+2], active_dim=[c]) + WhiteKernel(hp[i*4+3], active_dim=[c])
        #comp = ArcCosine(2, hp[i*4], hp[i*4+1], hp[i*4+2], active_dim=[i]) + WhiteKernel(hp[i*4+3], active_dim=[i])
        kernel += comp
    k = 4*len(nidx)
    kernel = kernel + gpk.WhiteKernel(hp[k])
    return kernel

def __create_kernel(hp):
    hp[len(nidx)] = nus[np.argmin(np.abs(nus - hp[len(nidx)]))]
    poly = Matern(hp[:len(nidx)], nu=hp[len(nidx)]) * ConstantKernel(hp[len(nidx)+1])
    kernel = poly + gpk.WhiteKernel(hp[len(nidx)+2])
    return kernel

def ___create_kernel(hp):
    hp[len(nidx)] = deg[np.argmin(np.abs(deg - hp[len(nidx)]))]
    poly = PiecewisePolynomial(hp[:len(nidx)], q=hp[len(nidx)]) * ConstantKernel(hp[len(nidx) + 1])
    kernel = poly + gpk.WhiteKernel(hp[len(nidx)+2])
    return kernel#dim = len(nidx)+10

def create_kernel(hp):
    poly = ArcCosine(2, hp[len(nidx)], hp[:len(nidx)], hp[len(nidx) + 1])
    kernel = poly + gpk.WhiteKernel(hp[len(nidx)+2])
    return kernel#dim = len(nidx)+10

df = pd.read_csv('/home/skyolia/Téléchargements/permeability.csv')
########################################################################################################################
out_column = 'UCS(mpa)'
to_remove = ['Permeability(mm/s)']
features = [c for c in df.columns if (c != out_column) and (c not in to_remove)]
categorical = [c for c in features if (df[c].dtype=='object') and (df[c].nunique() > 2)]
binary = [c for c in features if df[c].nunique() == 2]
numerical = [col for col in features if col not in categorical + binary]
df[numerical] = df[numerical].apply(pd.to_numeric,1)
########################################################################################################################
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(df[features], df[out_column], test_size=0.2, random_state=42)

YScaler = MinMaxScaler(feature_range=(0, 1))
Y_train, Y_test = YScaler.fit_transform(Y_train[:,None]).ravel() + 1e-15, Y_test.values.ravel() + 1e-15

MS = MinMaxScaler(feature_range=(0, 1))
scaled_train = MS.fit_transform(X_train[numerical])
scaled_test = MS.transform(X_test[numerical])
X_train[numerical], X_test[numerical] = scaled_train, scaled_test
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
########################################################################################################################
def sk_nll_stable(hp):
    done = False
    while not done:
        try:
            kernel = create_kernel(hp)
            gpr = GPR(kernel=kernel, optimizer=None).fit(X_train, Y_train)
            done = True
            return hp, -1 * gpr.log_marginal_likelihood_value_
        except (LinAlgError, ValueError):
            traceback.print_exc()
            hp = np.random.uniform(lb, ub, dim)

def mle_objf(pop, dim=None):
    population, fitness = [], []
    for hp in pop:
        _hp, mse = dask.delayed(sk_nll_stable, nout=2)(hp)
        population.append(_hp), fitness.append(mse)
    population, fitness = dask.compute(population, fitness)
    population, fitness = np.asarray(population), np.asarray(fitness)
    return population, fitness  # duration =  27.99079418182373

def ccf(population):
    return population

nus = np.asarray([0.5, 1.5, 2.5])
deg = np.asarray([0,1,2,3])
nidx = [X_train.columns.get_loc(i) for i in numerical] #numerical columns index
lb, ub = 1e-5, 1e3
#lb, ub = [lb, 0, lb, lb]*len(nidx) +[lb], [ub, 3, ub, ub]*len(nidx) +[ub]
#lb, ub = [lb, lb, lb, lb]*len(nidx) +[lb], [ub, ub, ub, ub]*len(nidx) +[ub]
#lb, ub = [lb]*len(nidx) + [0, lb, lb], [ub]*len(nidx) + [3, ub, ub]
#lb, ub = [lb]*len(nidx) + [lb, 1, lb], [ub]*len(nidx) + [ub, 5, ub]
lb, ub = [lb]*len(nidx) + [lb, lb, lb], [ub]*len(nidx) + [ub, ub, ub]
lb, ub = np.asarray(lb), np.asarray(ub)
dim = len(lb)

optimizer = co.CPSO(mle_objf, ccf, dim, lb, ub, 50, 5000)
print(optimizer.gbest)