# Exemple of pipeline PCA-GP-PCA type

In [1]:
from sklearn.datasets import make_regression

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor

## Generate some synthetic regression data

In [2]:
X, y = make_regression(n_samples=100, n_features=10, noise=0.1, n_targets=11)

## PCA-GP-PCA as an sklearn pipeline

### 1. Define the PCA for the shape embedding

In this example we only apply PCA to the first 8 columns

The last two columns are unchanged

In [3]:
feats_to_reduce = list(range(8))
preprocessor = ColumnTransformer(
    transformers=[
        (
            "pca",
            PCA(n_components=8),
            feats_to_reduce,
        ),
    ],
    remainder="passthrough",
)
preprocessor

0,1,2
,transformers,"[('pca', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,n_components,8
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


### 2. Define the output scaler for the output fields (MinMaxScaler + PCA)

In [4]:
postprocessor = Pipeline(
    [
        ("scaler", MinMaxScaler()),
        ("pca", PCA(n_components=9)),
    ]
)
postprocessor

0,1,2
,steps,"[('scaler', ...), ('pca', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_components,9
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


### 3. Define the regressor

Y = GP(transformer(X)) where transformer(X) = postprocessor(X)

In [5]:
regressor = TransformedTargetRegressor(
    regressor=GaussianProcessRegressor(
        n_restarts_optimizer=3,
    ),
    check_inverse=False,
    transformer=postprocessor,
)
regressor

0,1,2
,regressor,GaussianProce...s_optimizer=3)
,transformer,Pipeline(step...mponents=9))])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,kernel,
,alpha,1e-10
,optimizer,'fmin_l_bfgs_b'
,n_restarts_optimizer,3
,normalize_y,False
,copy_X_train,True
,n_targets,
,random_state,

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_components,9
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


### 4. Combine to make the pipeline

In [6]:
model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("scaler", StandardScaler()),
        ("regressor", regressor),
    ]
)
model

0,1,2
,steps,"[('preprocessor', ...), ('scaler', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('pca', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,n_components,8
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,regressor,GaussianProce...s_optimizer=3)
,transformer,Pipeline(step...mponents=9))])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,kernel,
,alpha,1e-10
,optimizer,'fmin_l_bfgs_b'
,n_restarts_optimizer,3
,normalize_y,False
,copy_X_train,True
,n_targets,
,random_state,

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_components,9
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


## Fit the model

In [7]:
model.fit(X, y)
model

0,1,2
,steps,"[('preprocessor', ...), ('scaler', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('pca', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,n_components,8
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,regressor,GaussianProce...s_optimizer=3)
,transformer,Pipeline(step...mponents=9))])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,kernel,
,alpha,1e-10
,optimizer,'fmin_l_bfgs_b'
,n_restarts_optimizer,3
,normalize_y,False
,copy_X_train,True
,n_targets,
,random_state,

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_components,9
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


## Predict on the training data

In [8]:
y_pred = model.predict(X)

## Other way to define the pipeline

### 1. Define the regressor

In [9]:
regressor = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("scaler", StandardScaler()),
        ("regressor", GaussianProcessRegressor(n_restarts_optimizer=3)),
    ]
)
regressor

0,1,2
,steps,"[('preprocessor', ...), ('scaler', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('pca', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,n_components,8
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,kernel,
,alpha,1e-10
,optimizer,'fmin_l_bfgs_b'
,n_restarts_optimizer,3
,normalize_y,False
,copy_X_train,True
,n_targets,
,random_state,


### 2. Combine to make the pipeline

In [10]:
model = TransformedTargetRegressor(
    regressor=regressor,
    check_inverse=False,
    transformer=postprocessor,
)
model

0,1,2
,regressor,Pipeline(step...ptimizer=3))])
,transformer,Pipeline(step...mponents=9))])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,transformers,"[('pca', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,n_components,8
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,kernel,
,alpha,1e-10
,optimizer,'fmin_l_bfgs_b'
,n_restarts_optimizer,3
,normalize_y,False
,copy_X_train,True
,n_targets,
,random_state,

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_components,9
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


In [11]:
model.fit(X,y)

0,1,2
,regressor,Pipeline(step...ptimizer=3))])
,transformer,Pipeline(step...mponents=9))])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,transformers,"[('pca', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,n_components,8
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,kernel,
,alpha,1e-10
,optimizer,'fmin_l_bfgs_b'
,n_restarts_optimizer,3
,normalize_y,False
,copy_X_train,True
,n_targets,
,random_state,

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_components,9
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,
