Template for using our pipeline in a jupyter notebook

In [10]:
# imports
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import pandas as pd
from src import configuration as config
from src.pipeline.pipeline_factory import PipelineFactory, ModelType, EvaluationType

In [11]:
# load the data
train_df = config.load_traindata_for_regression()
pipelineFactory = PipelineFactory()

# create the baseline pipeline
pipeline = pipelineFactory.create_pipeline(train_df,
                                                    ModelType.REGRE_BASELINE,
                                                    verbose_level=1,
                                                    evaluation=EvaluationType.BASIC)
pipeline.run()

Starting pipeline using method: EvaluationType.BASIC
0.0
Finished running the pipeline


In [12]:
from sklearn.linear_model import LinearRegression
# starting from the baseline model we can now change the estimator
pipeline.change_estimator(LinearRegression())

In [13]:
# we can now also add or remove steps
# supose we want to apply a step only on specific columns
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from src.pipeline.pipeline_transformers import OneHotEncoderTransformer


# define the column transformer
preprocessor = ColumnTransformer(
transformers=[
    ("some_name1", StandardScaler(), "column1, column2"),
    ("some_name2", StandardScaler(), "colum3"),
],
remainder="passthrough"
) 

# add the column transformer to the pipeline
pipeline.add_new_step(preprocessor, "preprocessor")

print(pipeline.get_pipeline().named_steps)

{'preprocessor': ColumnTransformer(remainder='passthrough',
                  transformers=[('some_name1', StandardScaler(),
                                 'column1, column2'),
                                ('some_name2', StandardScaler(), 'colum3')]), 'estimator': LinearRegression()}


In [14]:
# it is also possible to choose the position of the step
pipeline.add_new_step_at_position(StandardScaler(), "some_test_name", 0)

print(pipeline.get_pipeline().named_steps)

{'some_test_name': StandardScaler(), 'preprocessor': ColumnTransformer(remainder='passthrough',
                  transformers=[('some_name1', StandardScaler(),
                                 'column1, column2'),
                                ('some_name2', StandardScaler(), 'colum3')]), 'estimator': LinearRegression()}


In [15]:
# if we want to remove a step thats also possible
pipeline.remove_step("preprocessor")

print(pipeline.get_pipeline().named_steps)

{'some_test_name': StandardScaler(), 'estimator': LinearRegression()}


In [16]:
# self made custom tranformers can also be added
# please add the code to the src\pipeline\pipeline_transformers.py file
from src.pipeline.pipeline_transformers import ColumnDropper

pipeline.add_new_step(ColumnDropper(["column1", "column2"]), "column_dropper")

In [21]:
# One-Hot-Encoder
from src.pipeline.pipeline_transformers import OneHotEncoderTransformer2

pipeline.add_new_step(OneHotEncoderTransformer2(), "one_hot_encoder")

In [18]:
print(pipeline.get_pipeline().named_steps)

{'some_test_name': StandardScaler(), 'column_dropper': ColumnDropper(cols_to_drop=['column1', 'column2']), 'one_hot_encoder': OneHotEncoderTransformer2(), 'estimator': LinearRegression()}
