In [1]:
# hide
# default_exp tests.core.test_compose
from nbdev.showdoc import *
from block_types.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])

# Test compose

In [2]:
# export
import pytest
import os
import joblib
from IPython.display import display
import pandas as pd
import numpy as np
import time
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.utils import Bunch
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import KFold

from block_types.core.compose import *
from block_types.core.block_types import Component, PandasComponent, PickleSaverComponent
from block_types.core.utils import PickleIO
from block_types.utils.utils import remove_previous_results
from block_types.core.data_conversion import DataConverter, PandasConverter

import block_types.config.bt_defaults as dflt
from block_types.utils.utils import check_last_part

In [3]:
#export
@pytest.fixture (name='column_transformer_data')
def column_transformer_data_fixture():
    return column_transformer_data()

@pytest.fixture (name='multi_split_data')
def multi_split_data_fixture():
    return multi_split_data()

## Parallel

### `find_last_fitted_model`

#### Using Sequential

##### First test / example

In [4]:
# export
from block_types.utils.dummies import make_pipe_fit1

def test_pipeline_find_last_fitted_model_seq_others ():
    path_results = 'test_pipeline_find_last_fitted_model_seq_start'
    remove_previous_results (path_results=path_results)
    
    # pipelines
    pipe1 = make_pipe_fit1 ()
    X = np.array([1,2,3]).reshape(-1,1)
    r1 = pipe1.fit_apply (X)
    
    # case 1: component A
    pipe2 = make_pipe_fit1 (path_results=path_results, verbose=2)
    r = pipe2.A.fit_apply (X)
    all_fitted = pipe2.find_last_fitted_model ()
    assert not all_fitted
    pipe2.A.raise_error = True
    with pytest.raises (RuntimeError):
        r2 = pipe2.fit_apply (None)
    remove_previous_results (path_results=path_results)
    
    # case 2: component B
    pipe2 = make_pipe_fit1 (path_results=path_results, verbose=2)
    r = pipe2.A.fit_apply (X)
    r = pipe2.B.fit_apply (r)
    all_fitted = pipe2.find_last_fitted_model ()
    assert not all_fitted
    pipe2.A.raise_error = True
    r2 = pipe2.fit_apply (None)
    assert (r1==r2).all()
    remove_previous_results (path_results=path_results)
    
    # case 2: component C
    pipe2 = make_pipe_fit1 (path_results=path_results, verbose=2)
    r = pipe2.A.fit_apply (X)
    r = pipe2.B.fit_apply (r)
    r = pipe2.C.fit_apply (r)
    all_fitted = pipe2.find_last_fitted_model ()
    assert not all_fitted
    pipe2.A.raise_error = True
    pipe2.B.raise_error = True
    pipe2.B.estimator = Bunch ()
    pipe2.C.estimator = Bunch ()
    r2 = pipe2.fit_apply (None)
    assert (r1==r2).all()
    remove_previous_results (path_results=path_results)

In [5]:
tst.run (test_pipeline_find_last_fitted_model_seq_others, tag='dummy')

running test_pipeline_find_last_fitted_model_seq_others


applying A (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_seq_start/whole/A_result.pk
fitting pipeline (using whole data)
applying A (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_seq_start/whole/A_result.pk
fitting B (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_seq_start/models/B_estimator.pk
applying B (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_seq_start/whole/B_result.pk
fitting pipeline (using whole data)
fitting B (using whole data)
loading from /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_seq_start/models/B_estimator.pk
loaded pre-trained B
applying B (on whole data)
loading from /home/jcidatascience/jaume/workspace/

##### Second test / example

#### Using Parallel

In [6]:
# export
from block_types.utils.dummies import make_pipe_fit2

def test_pipeline_find_last_fitted_model_parallel_2 ():
    path_results = 'test_pipeline_find_last_fitted_model_parallel_2'
    remove_previous_results (path_results=path_results)
    
    # ******************************************************
    # pipelines
    pipe1 = make_pipe_fit2 ()
    X = np.array([1,2,3]).reshape(-1,1)
    r1 = pipe1.fit_apply (X)
    
    # ******************************************************
    pipe2 = make_pipe_fit2 (path_results=path_results, verbose=2, root=True)
    # second
    r = pipe2.A0.fit_apply (X)
    r = pipe2.A1.fit_apply (r)
    b1 = pipe2.obj.B1.fit_apply (r)
    b2 = pipe2.obj.B2.fit_apply (r)
    
    b3a = pipe2.obj.B3a.fit_apply (r)
    b3b = pipe2.obj.B3b.fit_apply (b3a)
    b3c = pipe2.obj.B3c.fit_apply (b3b)
    b3d = pipe2.obj.B3d.fit_apply (b3c)
    
    b4a = pipe2.obj.B4a.fit_apply (r)
    b4b = pipe2.obj.B4b.fit_apply (b4a)
    b4c = pipe2.obj.B4c.fit_apply (b4b)
    b4d = pipe2.obj.B4d.fit_apply (b4c)
    b4e = pipe2.obj.B4e.fit_apply (b4d)
    
    b5 = pipe2.obj.B5.fit_apply (r)

    all_fitted = pipe2.find_last_fitted_model ()
    assert not all_fitted

    pipe2.A0.raise_error = True
    pipe2.A1.raise_error = True
    
    pipe2.obj.B3a.raise_error = True
    pipe2.obj.B3b.raise_error = True
    pipe2.obj.B3c.raise_error = True
        
    pipe2.obj.B4a.raise_error = True
    pipe2.obj.B4b.raise_error = True
    pipe2.obj.B4c.raise_error = True
    pipe2.obj.B4d.raise_error = True
    
    pipe2.A1.create_estimator()
    pipe2.obj.B2.create_estimator()
    pipe2.obj.B3a.create_estimator()
    pipe2.obj.B3b.create_estimator()
    pipe2.obj.B4b.create_estimator()
    pipe2.obj.B4c.create_estimator()
    pipe2.obj.B4e.create_estimator()
    pipe2.obj.B5.create_estimator()
    
    pipe2.logger.info (f'\n{"-"*100}\n')
    r2 = pipe2.fit_apply (None)
    assert (r1==r2).all()
    
    remove_previous_results (path_results=path_results)
    
    # ******************************************************
    
    pipe2.logger.info (f'\n{"*"*100}\n{"*"*100}\n')
    pipe2 = make_pipe_fit2 (path_results=path_results, verbose=2, root=True)
    # second
    r = pipe2.A0.fit_apply (X)
    r = pipe2.A1.fit_apply (r)
    b1 = pipe2.obj.B1.fit_apply (r)
    b2 = pipe2.obj.B2.fit_apply (r)
    
    b3a = pipe2.obj.B3a.fit_apply (r)
    b3b = pipe2.obj.B3b.fit_apply (b3a)
    b3c = pipe2.obj.B3c.fit_apply (b3b)
    b3d = pipe2.obj.B3d.fit_apply (b3c)
    
    b4 = pipe2.obj.pipeline_1_1.fit_apply (r)
    b5 = pipe2.obj.B5.fit_apply (r)

    all_fitted = pipe2.find_last_fitted_model ()
    assert not all_fitted

    pipe2.A0.raise_error = True
    pipe2.A1.raise_error = True
    
    pipe2.obj.B3a.raise_error = True
    pipe2.obj.B3b.raise_error = True
    pipe2.obj.B3c.raise_error = True
        
    pipe2.obj.B4a.raise_error = True
    pipe2.obj.B4b.raise_error = True
    pipe2.obj.B4c.raise_error = True
    pipe2.obj.B4d.raise_error = True
    
    pipe2.A1.create_estimator()
    pipe2.obj.B2.create_estimator()
    pipe2.obj.B3a.create_estimator()
    pipe2.obj.B3b.create_estimator()
    pipe2.obj.B4b.create_estimator()
    pipe2.obj.B4c.create_estimator()
    pipe2.obj.B4e.create_estimator()
    pipe2.obj.B5.create_estimator()
    
    pipe2.logger.info (f'\n{"-"*100}\n')
    r2 = pipe2.fit_apply (None)
    assert (r1==r2).all()
    
    remove_previous_results (path_results=path_results)
    
    # ******************************************************
    
    pipe2.logger.info (f'\n{"*"*100}\n{"*"*100}\n')
    pipe2 = make_pipe_fit2 (path_results=path_results, verbose=2, root=True)
    # second
    r = pipe2.A0.fit_apply (X)
    r = pipe2.A1.fit_apply (r)
    r = pipe2.obj.parallel.fit_apply (r)

    all_fitted = pipe2.find_last_fitted_model ()
    assert not all_fitted

    pipe2.A0.raise_error = True
    pipe2.A1.raise_error = True
    
    pipe2.obj.B1.raise_error = True
    pipe2.obj.B2.raise_error = True
    
    pipe2.obj.B3a.raise_error = True
    pipe2.obj.B3b.raise_error = True
    pipe2.obj.B3c.raise_error = True
    pipe2.obj.B3d.raise_error = True
        
    pipe2.obj.B4a.raise_error = True
    pipe2.obj.B4b.raise_error = True
    pipe2.obj.B4c.raise_error = True
    pipe2.obj.B4d.raise_error = True
    pipe2.obj.B4e.raise_error = True
    
    pipe2.obj.B5.raise_error = True
    
    pipe2.A1.create_estimator()
    pipe2.obj.B2.create_estimator()
    pipe2.obj.B3a.create_estimator()
    pipe2.obj.B3b.create_estimator()
    pipe2.obj.B4b.create_estimator()
    pipe2.obj.B4c.create_estimator()
    pipe2.obj.B4e.create_estimator()
    pipe2.obj.B5.create_estimator()
    
    pipe2.logger.info (f'\n{"-"*100}\n')
    r2 = pipe2.fit_apply (None)
    assert (r1==r2).all()
    
    remove_previous_results (path_results=path_results)
    
    # ******************************************************
    pipe2.logger.info (f'\n{"*"*100}\n{"*"*100}\n')
    pipe2 = make_pipe_fit2 (path_results=path_results, verbose=2, root=True, new_parallel=True)
    # second
    r = pipe2.A0.fit_apply (X)
    r = pipe2.A1.fit_apply (r)
    r = pipe2.obj.new_parallel.fit_apply (r)
    r = pipe2.C.fit_apply (r)

    all_fitted = pipe2.find_last_fitted_model ()
    assert not all_fitted

    pipe2.A0.raise_error = True
    pipe2.A1.raise_error = True
    
    pipe2.obj.B1.raise_error = True
    pipe2.obj.B2.raise_error = True
    
    pipe2.obj.B3a.raise_error = True
    pipe2.obj.B3b.raise_error = True
    pipe2.obj.B3c.raise_error = True
    pipe2.obj.B3d.raise_error = True
        
    pipe2.obj.B4a.raise_error = True
    pipe2.obj.B4b.raise_error = True
    pipe2.obj.B4c.raise_error = True
    pipe2.obj.B4d.raise_error = True
    pipe2.obj.B4e.raise_error = True
    
    pipe2.obj.B5.raise_error = True
    
    pipe2.new_parallel.raise_error = True
    
    pipe2.A1.create_estimator()
    pipe2.obj.B2.create_estimator()
    pipe2.obj.B3a.create_estimator()
    pipe2.obj.B3b.create_estimator()
    pipe2.obj.B4b.create_estimator()
    pipe2.obj.B4c.create_estimator()
    pipe2.obj.B4e.create_estimator()
    pipe2.obj.B5.create_estimator()
    
    pipe2.logger.info (f'\n{"-"*100}\n')
    r2 = pipe2.fit_apply (None)
    assert (r1==r2).all()
    
    remove_previous_results (path_results=path_results)
    
    # ******************************************************
    pipe2.logger.info (f'\n{"*"*100}\n{"*"*100}\n')
    pipe2 = make_pipe_fit2 (path_results=path_results, verbose=2, root=True, new_parallel=True)
    # second
    r = pipe2.A0.fit_apply (X)
    r = pipe2.A1.fit_apply (r)
    r = pipe2.obj.new_parallel.fit_apply (r)
    r = pipe2.C.fit_apply (r)
    r = pipe2.D.fit_apply (r)

    all_fitted = pipe2.find_last_fitted_model ()
    assert all_fitted

    pipe2.A0.raise_error = True
    pipe2.A1.raise_error = True
    
    pipe2.obj.B1.raise_error = True
    pipe2.obj.B2.raise_error = True
    
    pipe2.obj.B3a.raise_error = True
    pipe2.obj.B3b.raise_error = True
    pipe2.obj.B3c.raise_error = True
    pipe2.obj.B3d.raise_error = True
        
    pipe2.obj.B4a.raise_error = True
    pipe2.obj.B4b.raise_error = True
    pipe2.obj.B4c.raise_error = True
    pipe2.obj.B4d.raise_error = True
    pipe2.obj.B4e.raise_error = True
    
    pipe2.obj.B5.raise_error = True
    
    pipe2.new_parallel.raise_error = True
    pipe2.C.raise_error = True
    
    pipe2.A1.create_estimator()
    pipe2.obj.B2.create_estimator()
    pipe2.obj.B3a.create_estimator()
    pipe2.obj.B3b.create_estimator()
    pipe2.obj.B4b.create_estimator()
    pipe2.obj.B4c.create_estimator()
    pipe2.obj.B4e.create_estimator()
    pipe2.obj.B5.create_estimator()
    
    pipe2.D.create_estimator()
    
    pipe2.logger.info (f'\n{"-"*100}\n')
    r2 = pipe2.fit_apply (None)
    assert (r1==r2).all()
    
    remove_previous_results (path_results=path_results)

In [7]:
tst.run (test_pipeline_find_last_fitted_model_parallel_2, tag='dummy')

running test_pipeline_find_last_fitted_model_parallel_2
parallel class: <class 'block_types.core.compose.Parallel'>


applying A0 (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/A0_result.pk
fitting A1 (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/A1_estimator.pk
applying A1 (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/A1_result.pk
applying B1 (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B1_result.pk
fitting B2 (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B2_estimator.pk
applying B2 (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B2_re

parallel class: <class 'block_types.core.compose.Parallel'>


applying B3a (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3a_result.pk
fitting B3b (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3b_estimator.pk
applying B3b (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3b_result.pk
applying B3c (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3c_result.pk
fitting B3d (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3d_estimator.pk
applying B3d (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/

parallel class: <class 'block_types.core.compose.Parallel'>


saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3b_estimator.pk
applying B3b (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3b_result.pk
applying B3c (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3c_result.pk
fitting B3d (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3d_estimator.pk
applying B3d (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3d_result.pk
fitting pipeline_1_1 (using whole data)
applying B4a (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_pa

parallel class: <class 'block_types.core.compose.Parallel'>


applying B3a (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3a_result.pk
fitting B3b (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3b_estimator.pk
applying B3b (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3b_result.pk
applying B3c (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3c_result.pk
fitting B3d (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3d_estimator.pk
applying B3d (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/

parallel class: <class 'block_types.utils.dummies.NewParallel'>


fitting B3a (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3a_estimator.pk
applying B3a (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3a_result.pk
fitting B3b (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3b_estimator.pk
applying B3b (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3b_result.pk
applying B3c (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3c_result.pk
fitting B3d (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_

parallel class: <class 'block_types.utils.dummies.NewParallel'>


saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3a_result.pk
fitting B3b (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3b_estimator.pk
applying B3b (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3b_result.pk
applying B3c (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3c_result.pk
fitting B3d (using whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/models/B3d_estimator.pk
applying B3d (on whole data)
saving to /home/jcidatascience/jaume/workspace/remote/temp/block-types/test_pipeline_find_last_fitted_model_parallel_2/whole/B3d_result.pk
saving to

## Data conversion

In [None]:
#export
from block_types.utils.dummies import (DataSource, SumXY, MaxOfPositiveWithSeparateLabels, Sum1direct,
                                       Multiply10direct, subtract_xy, MinOfPositiveWithoutSeparateLabels)

def test_data_conversion_for_sequential_and_parallel ():
    class SumXYConverter (DataConverter):
        def convert_before_transforming (self, X, **kwargs):
            self.label = X[2]
            return X[0], X[1]
        def convert_after_transforming (self, result, **kwargs):
            result['label'] = self.label
            return result

    pipe = Sequential (DataSource (),
                       SumXY (data_converter=SumXYConverter),
                       PandasComponent (apply=lambda X: X*2),
                       MaxOfPositiveWithSeparateLabels (data_converter='PandasConverter'),
                       Parallel (Sum1direct (data_converter='PandasConverter'), 
                                 Multiply10direct (data_converter='PandasConverter'),
                                 finalize_result=lambda X: tuple(X)),
                       PandasComponent(apply=subtract_xy),
                       MinOfPositiveWithoutSeparateLabels (
                           data_converter=PandasConverter (separate_labels=False)),
                       make_column_transformer ((Multiply10direct (), ['a','b']), 
                                                (Sum1direct (), ['c','d'])),
                       Sum1direct ())
    
    #result = pipe.fit_apply ()

In [None]:
#tst.run (test_data_conversion_for_sequential_and_parallel, tag='dummy')