### Pipeline outside of ML

This notebook shows some trial and error to create a `Pipeline` that can be used with `xarray_filters`.  

This is a continuation of goals in [Elm issue #149](https://github.com/ContinuumIO/elm/issues/149) to separate ML from GIS utils.

The goal is to be able to run something like this:
```
from xarray_filters.pipeline import Pipeline
from xarary_filters.steps import Generic, Serialize
def step_1(dset, **kw):
    return kw['a'] * dset.mean(dim=('x', 'y')) ** kw['b']

def step_2(dset, **kw):
    return kw['a'] + dset * kw['b']
    
steps = (('s1', Generic(step_1)),
         ('s2', Generic(step_2)),
         ('s3', Serialize('two_step_pipeline_out.nc')))
pipe = Pipeline(steps=steps)
pipe.set_params(s1__a=2,
                s1__b=3,
                s2__a=0,
                s2__b=0,
                s3__fname='file_with_zeros.nc')
pipe.fit_transform(X)
```
 * The example above uses scikit-learn `set_params` style of setting parameters where:
   * Steps in the `Pipeline` are named, `s1`, `s2`, and `s3` in this case
   * Double underscore notation is used to pass parameters to the `set_params` method of a given step.  Here:
     * `a` and `b` are parameters accepted by `step_1` and `step_2`
     * `fname` is accepted by `Serialize`
   * The `Dataset` or `MLDataset` `X` is run through the 3 steps
   * Note the import statements with `xarray_filters` at top of snippet is what we need to do based on this notebook
* Classes formerly part of `elm.pipeline.steps` will now inherit from `sklearn.base.BaseEstimator`


In [None]:
from xarray_filters import MLDataset
from xarray_filters.tests.test_data import new_test_dataset

from __future__ import absolute_import, division, print_function, unicode_literals

import sklearn
from sklearn.pipeline import Pipeline as _Pipeline

from abc import ABCMeta, abstractmethod
import six

from xarray_filters.pipeline import Step, WriteNetCDF, Generic

In [None]:
X = new_test_dataset(('wind', 'pressure', 'temperature',))
X

In [None]:
def step_1(dset, **kw):
    return kw['a'] * dset.mean(dim=('x', 'y')) ** kw['b']

def step_2(dset, **kw):
    return kw['a'] + dset * kw['b']

steps = (('s1', Generic(step_1)),
         ('s2', Generic(step_2)),
         ('s3', WriteNetCDF('two_step_pipeline_out.nc')))

In [None]:
(_, s1), _, (_, s3) = steps

In [None]:
s1.set_params(kw=dict(a=0, b=0))
ones = s1.transform(X)
s1.set_params(kw=dict(a=2, b=2))
other = s1.transform(X)
other.temperature - ones.temperature

In [None]:
class Example(Generic):
    a = 1
    b = 2
    func = None
s1, s2 = Example(func=step_1), Example(func=step_2)
steps = ('s1', s1), ('s2', s2), ('s3', s3)

In [None]:
s1.set_params(a=2,b=3)

In [None]:
s1.fit_transform(X)

In [None]:
from elm.pipeline import Pipeline
Pipeline._validate_steps = lambda x: True
pipe = Pipeline(steps=steps)

In [None]:
pipe

In [None]:
pipe.get_params()

In [None]:
pipe.set_params(s1__a=2, s1__b=3, s2__a=0, s2__b=0, s3__fname='file_with_zeros.nc')
pipe.transform(X)

In [None]:
pipe.set_params(s1__a=2, s1__b=3, s2__a=1, s2__b=1, s3__fname='file_nonzero.nc')
pipe.transform(X)

In [None]:
! ls -l *.nc

In [None]:
from xarray_filters.datasets import make_regression
from xarray_filters.pipeline import Generic
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.svm import SVR
from elm.pipeline import steps
from sklearn.preprocessing import PolynomialFeatures
from xarray_filters import MLDataset
from xarray_filters.datasets import _make_base
from elm.model_selection.ea_searchcv import EaSearchCV
import numpy as np


In [None]:
shape = (30, 33)
X = make_regression(shape=shape, n_samples=np.prod(shape), )

In [None]:
X

In [None]:
X.to_features()

In [None]:
Xa, ya = X.to_array()

In [None]:
Xa, ya

In [None]:
from elm.pipeline.steps import LinearRegression, PolynomialFeatures, PCA

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA

In [None]:
from elm.pipeline.pipeline import Pipeline

In [None]:
pipe = Pipeline([('poly', PolynomialFeatures()), ('pca', PCA()), ('reg', LinearRegression())])

In [None]:
pipe

In [None]:
pipe.fit(X)

In [None]:
pipe.predict(X)