# Advanced sklearn tutorial
> API details

## Pipeline

Useful API to chain operations on dataset.

In [None]:
from sklearn.datasets import make_classification
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression

data = make_classification(1000)

pipe = make_pipeline(
    StandardScaler(),
    PCA(),
    LogisticRegression()
)

pipe.fit(*data)


## Feature aggregation

In [None]:
from sklearn.datasets import make_classification
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.neural_network import MLPClassifier

data = make_classification(1000)

pipe = make_pipeline(
    StandardScaler(),
    make_union( 
                PCA(n_components=2),
                TruncatedSVD(n_components=2),
                KBinsDiscretizer()
                ),
    MLPClassifier()
)

pipe.fit(*data)

## Column Transformer

In [None]:
from sklearn.datasets import make_classification
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier

data = make_classification(1000)

pipe = make_pipeline(
    ColumnTransformer(
        [
            ("scaled", StandardScaler(), slice(0, 10)),
            ("disc", KBinsDiscretizer(n_bins=10), slice(10,12)),
            ("rest", "passthrough", slice(12,20))
    ]), 
    PCA(n_components=2),
    MLPClassifier()
)

pipe.fit(*data)


## mlflow

[Source](https://www.mlflow.org/docs/latest/tutorial.html)

In [None]:
"""import mlflow

with mlflow.start_run():
        
        # ... Fit model   

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")"""

'import mlflow\n\nwith mlflow.start_run():\n        \n        # ... Fit model   \n\n        mlflow.log_param("alpha", alpha)\n        mlflow.log_param("l1_ratio", l1_ratio)\n        mlflow.log_metric("rmse", rmse)\n        mlflow.log_metric("r2", r2)\n        mlflow.log_metric("mae", mae)\n\n        mlflow.sklearn.log_model(lr, "model")'