In [34]:
# hide
# default_exp core.block_types
import os
from nbdev.showdoc import *
if not os.path.exists('settings.ini'):
    os.chdir('..')
    
from block_types.core.block_types import __all__

# Block types

> Types of blocks

In [35]:
#export
from functools import partialmethod
from typing import Optional
import copy
import pickle
from pathlib import Path

from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
import joblib
from IPython.display import display

try:
    from graphviz import *
    imported_graphviz = True
except:
    imported_graphviz = False

# block_types
from block_types.core.data_conversion import DataConverter, NoConverter, PandasConverter
from block_types.core.utils import save_csv, save_parquet, save_multi_index_parquet, save_keras_model, save_csv_gz, read_csv, read_csv_gz
from block_types.core.utils import DataIO, SklearnIO, PandasIO, NoSaverIO, ModelPlotter
from block_types.core.utils import camel_to_snake
from block_types.config import defaults as dflt
from block_types.utils.utils import set_logger

## Component

In [36]:
class A():
    def f(self, x=1, y=2, func=None):
        print (x, y, func)
        getattr(self,func)(x, y)
    def h (self, x=1, y=2):
        print (x*2+y)
    def g (self, x=1, y=2):
        print (x, y)
        print (x*3+y)
    z = partialmethod (f, func='g', x=2)

a = A()
a.z (y=3)

2 3 g
2 3
9


In [37]:
#export

class Component (ClassifierMixin, TransformerMixin, BaseEstimator):
    """Base component class used in our Pipeline."""
    def __init__ (self,
                  estimator=None,
                  name: Optional[str] = None,
                  data_converter: Optional[DataConverter] = None,
                  data_io: Optional[DataIO] = None,
                  model_plotter: Optional[ModelPlotter] = None,
                  logger=None,
                  verbose: int = 0,
                  **kwargs):
        
        """
        Initialize attributes and fields.
        
        Parameters
        ----------
        estimator : estimator (classifier or transformer) or None, optional
            Estimator being wrapped.
        name : Pipeline or None, optional
            Name of component. If not provided, it is inferred from the name of the 
            estimator's class, or the name of the custom class defining the componet.
        data_converter : DataConverter or None, optional
            Converts incoming data to format expected by component, and convert 
            outgoing result to format expected by caller.
        data_io : DataIO or None, optional
            Manages data serialization and deserialization.
        model_plotter : ModelPlotter or None, optional
            Helper object that allows to retrieve information to be shown about this 
            component, as part of a Pipeline diagram.
        logger : logging.logger or None, optional
            Logger used to write messages
        verbose : int, optional
            Verbosity, 0: warning or critical, 1: info, 2: debug.
        """

        # logger used to display messages
        if logger is None:
            self.logger = set_logger ('block_types', verbose=verbose)
        else:
            self.logger = logger

        # name of current component, for logging and plotting purposes
        self._determine_component_name (name, estimator)

        # object that manages loading / saving
        if data_io is None:
            self.data_io = DataIO (component=self, **kwargs)
        else:
            self.data_io = copy.copy(data_io)
            self.data_io.setup (self)

        # estimator (ML model)
        self.estimator = estimator

        # data converter
        if data_converter is None:
            self.data_converter = NoConverter ()
        else:
            self.data_converter = data_converter

        # plotting model component
        if model_plotter is None:
            self.model_plotter = ModelPlotter (component=self, **kwargs)
        else:
            self.model_plotter = model_plotter
            self.model_plotter.set_component (self)

    def _determine_component_name (self, name: Optional[str], estimator) -> None:
        """
        Determines an appropriate name for the component if not provided by input.
        
        If not provided, it is inferred from the name of the estimator's class, or 
        the name of the custom class defining the componet.
        """
        self.class_name = self.__class__.__name__
        if (self.class_name in __all__) and (estimator is not None):
            self.class_name = estimator.__class__.__name__

        if name is not None:
            self.name = name
        else:
            self.name = camel_to_snake (self.class_name)
            
    def fit_like (self, X, y=None, load=True, save=True, func='_fit', **kwargs):
        """
        Estimates the parameters of the component based on given data X and labels y.
        
        Uses the previously fitted parameters if they're found in disk and overwrite 
        is False.
        """
        self.logger.info (f'fitting {self.name}')
        
        previous_estimator = None
        if load and not self.data_io.overwrite:
            previous_estimator = self.data_io.load_estimator()
            
        if previous_estimator is None:
            X, y = self.data_converter.convert_before_fitting (X, y)
            if func=='_fit':
                if len(kwargs) > 0:
                    raise AttributeError (f'kwargs: {kwargs} not valid')
                self._fit (X, y)
            elif func=='__fit_apply':
                result = self.__fit_apply (X, y, **kwargs)
            else:
                raise ValueError (f'function {func} not valid')
            self.data_converter.convert_after_fitting (X)
            if save:
                self.data_io.save_estimator ()
        else:
            self.estimator = previous_estimator
            self.logger.info (f'loaded pre-trained {self.name}')
        if func=='_fit':
            return self
        else:
            return result
    
    def __fit_apply (self, X, y, **kwargs):
        if callable(getattr(self, '_fit_apply', None)):
            return self._fit_apply (X, y, **kwargs)
        else:
            return self.fit (X, y).apply (X, **kwargs)
     
    fit = partialmethod (fit_like, func='_fit')
    fit_apply = partialmethod (fit_like, func='__fit_apply')
    
    # aliases
    fit_transform = fit_apply
    fit_predict = fit_apply

    def apply (self, *X, load=True, save=True, **kwargs):
        """
        Transforms the data X and returns the transformed data.
        
        Uses the previously transformed data if it's found in disk and overwrite 
        is False.
        """
        self.logger.info (f'applying {self.name} transform')
        result_func = self._determine_result_func ()
        result = self._compute_result (X, result_func, load=load, save=save, **kwargs)
        return result

    def _determine_result_func (self):
        implemented = []
        if callable(getattr(self, '_apply', None)):
            result_func = self._apply
            implemented += [result_func]
        if callable(getattr(self, '_transform', None)):
            result_func = self._transform
            implemented += [result_func]
        if callable(getattr(self, '_predict', None)):
            result_func = self._predict
            implemented += [result_func]
        if self.estimator is not None and callable(getattr(self.estimator, 'transform', None)):
            result_func = self.estimator.transform
            implemented += [result_func]
        if self.estimator is not None and callable(getattr(self.estimator, 'predict', None)):
            result_func = self.estimator.predict
            implemented += [result_func]
        if len (implemented) == 0:
            raise AttributeError (f'{self.class_name} must have one of _transform, _apply, or _predict methods implemented\n'
                                  f'Otherwise, self.estimator must have either predict or transform methods')
        if len(implemented) > 1:
            raise AttributeError (f'{self.class_name} must have only one of _transform, _apply, '
                                  f'or _predict methods implemented => found: {implemented}')
        return result_func
    
    # aliases for transform method
    __call__ = apply
    transform = apply
    predict = partialmethod (apply, new_columns=['prediction'])

    def _compute_result (self, X, result_func, load=True, save=True, **kwargs):
        if len(X) == 1:
            X = X[0]
        previous_result = None
        if load and not self.data_io.overwrite:
            previous_result = self.data_io.load_result()
        if previous_result is None:
            X = self.data_converter.convert_before_transforming (X, **kwargs)
            if type(X) is tuple:
                result = result_func (*X)
            else:
                result = result_func (X)
            result = self.data_converter.convert_after_transforming (result, **kwargs)
            if save:
                self.data_io.save_result (result)
        else:
            result = previous_result
            self.logger.info (f'loaded pre-computed result')
        return result


    def _fit (self, X, y=None):
        if self.estimator is not None:
            self.estimator.fit (X, y)
            
    def show_result_statistics (self, result=None, training_data_flag=False) -> None:
        """
        Show statistics of transformed data.
        
        Parameters
        ----------
        result: DataFrame or other data structure or None, optional
            Transformed data whose statistics we show. If not provided, it is loaded 
            from disk.
        training_data_flag: bool, optional
            If True, transformed training data is loaded, otherwise transformed test 
            data is loaded.
        """
        if result is None:
            self.set_training_data_flag (training_data_flag)
            df = self.data_io.load_result()
        else:
            df = result
        
        if df is not None:
            display (self.name)
            if callable(getattr(df, 'describe', None)):
                display (df.describe())

    def assert_equal (self, path_reference_results: str, assert_equal_func=pd.testing.assert_frame_equal, **kwargs):
        """
        Check whether the transformed data is the same as the reference data stored in given path.
        
        Parameters
        ----------
        path_reference_results: str
            Path where reference results are stored. The path does not include the 
            file name, since this is stored as a field of data_io.
        assert_equal_func: function, optional
            Function used to check whether the values are the same. By defaut, 
            `pd.testing.assert_frame_equal` is used, which assumes the data type is 
            DataFrame.
        
        """
        type_result = 'training' if self.data_io.training_data_flag else 'test'
        self.logger.info (f'comparing {type_result} results for {self.class_name}')
        
        self.logger.info (f'loading...')
        current_results = self.data_io.load_result ()
        if self.data_io.training_data_flag:
            path_to_reference_file = Path(path_reference_results) / self.data_io.result_file_name_training
        else:
            path_to_reference_file = Path(path_reference_results) / self.data_io.result_file_name_test
        reference_results = self.data_io._load (path_to_reference_file, self.data_io.result_load_func)
        self.logger.info (f'comparing...')
        assert_equal_func (current_results, reference_results, **kwargs)
        self.logger.info (f'equal results\n')

    # ********************************
    # exposing some data_io and data_converters methods
    # ********************************
    def load_estimator (self):
        estimator = self.data_io.load_estimator ()
        if estimator is not None:
            self.estimator = estimator
        
    # ********************************
    # setters
    # ********************************
    def set_training_data_flag (self, training_data_flag):
        self.data_io.set_training_data_flag (training_data_flag)

    def set_save_result_flag_test (self, save_result_flag_test):
        self.data_io.set_save_result_flag_test (save_result_flag_test)

    def set_save_result_flag_training (self, save_result_flag_training):
        self.data_io.set_save_result_flag_training (save_result_flag_training)

    def set_save_result_flag (self, save_result_flag):
        self.data_io.set_save_result_flag (save_result_flag)

    def set_overwrite (self, overwrite):
        self.data_io.set_overwrite (overwrite)

    def set_save_fitting (self, save_fitting):
        self.data_io.set_save_fitting (save_fitting)

# ******************************************
# Subclasses of Component.
# Most of these are basically the same as GenericComponent, the only difference being that some parameters
# are over-riden when constructing the object, to force a specific behavior
# ******************************************

#### Transform method called with different aliases

In [38]:
import pytest

# test that we can implement _transform and use all the aliases 
# (transform, predict, apply,  __call__)
class MyTransform (Component):
    def _apply (self, x):
        return x*2

my_transform = MyTransform()
assert my_transform.transform (3) == 6
assert my_transform.predict (3) == 6
assert my_transform.apply (3) == 6
assert my_transform (3) == 6

# test that we can implement _apply and use all the aliases 
# (transform, predict, apply and __call__)
class MyTransform2 (Component):
    def _apply (self, x):
        return x*2

my_transform2 = MyTransform2()
assert my_transform2.transform (3) == 6
assert my_transform2.predict (3) == 6
assert my_transform2.apply (3) == 6
assert my_transform2 (3) == 6

# test that we can implement _predict and use all the aliases 
# (transform, predict, apply and __call__)
class MyTransform3 (Component):
    def _predict (self, x):
        return x*2

my_transform3 = MyTransform3()
assert my_transform3.transform (3) == 6
assert my_transform3.predict (3) == 6
assert my_transform3.apply (3) == 6
assert my_transform3 (3) == 6

# test that an exception is raised if neither _tranform nor _apply are defined
class MyTransform4 (Component):
    def _wrong_method (self, x):
        return x*2
 
my_transform4 = MyTransform4 ()

import pytest
with pytest.raises(Exception):
    my_transform4.transform(3)
    

# test that an exception is raised if more than one alias is implemented
class MyTransform5 (Component):
    def _predict (self, x):
        return x*2
    def _apply (self, x):
        return x*2
 
my_transform5 = MyTransform5 ()

import pytest
with pytest.raises(Exception):
    my_transform5.transform(3)

applying my_transform transform
applying my_transform transform
applying my_transform transform
applying my_transform transform
applying my_transform2 transform
applying my_transform2 transform
applying my_transform2 transform
applying my_transform2 transform
applying my_transform3 transform
applying my_transform3 transform
applying my_transform3 transform
applying my_transform3 transform
applying my_transform4 transform
applying my_transform5 transform


#### Calling `predict` is handy when the result is a single array of predictions

In [39]:
# TODO: remove this cell
if False:
    class MyTransform (Component):
        def __init__ (self, **kwargs):
            super().__init__ (
                data_converter=PandasConverter(**kwargs),
                **kwargs)

        def _predict (self, x):
            return x['a']+x['b']

    my_transform = MyTransform()

    df = pd.DataFrame ({'a': [10,20,30],'b':[4,5,6]})

    pd.testing.assert_frame_equal(my_transform.transform (df).to_frame(), 
                                  pd.DataFrame ({0: [14,25,36]})
                                 )

    if False:
        pd.testing.assert_frame_equal(my_transform.predict (df), 
                                      pd.DataFrame ({0: [14,25,36]})
                                     )

#### The `transform` method and its aliases can be called with multiple inputs

In [40]:
# test that we can apply tranform to multiple data items
class MyTransform (Component):
    def _apply (self, x, y):
        return x+y

my_transform = MyTransform ()
result = my_transform.transform (3, 4)
print (result)
assert result==7

# test that we can apply tranform to single data items
class MyTransform2 (Component):
    def _apply (self, x):
        return x*2

my_transform2 = MyTransform2 ()
result = my_transform2.transform (3)
print (result)
assert result==6

applying my_transform transform
applying my_transform2 transform


7
6


#### `fit_apply()` and its aliases `fit_transform(), fit_predict()`

`_fit_apply()` is called when implemented, otherwise `fit().apply()` is called

In [41]:
import numpy as np 

# example with _fit_apply implemented
class Transform1 (Component):
    def __init__ (self, **kwargs):
        super().__init__ (**kwargs)
    def _fit (self, X, y=None):
        self.sum = X.sum(axis=0)
    def _apply (self, X):
        return X + self.sum
    def _fit_apply (self, X, y=None):
        self.sum = X.sum(axis=0)*10
        return X + self.sum

tr1 = Transform1 ()
X = np.array ([100, 90, 10])
result = tr1.fit_apply (X)
assert (result==(X+2000)).all()

# same result obtained by aliases
result = tr1.fit_transform (X)
assert (result==(X+2000)).all()
    
# example without _fit_apply implemented
class Transform2 (Component):
    def __init__ (self, **kwargs):
        super().__init__ (**kwargs)
    def _fit (self, X, y=None):
        self.sum = X.sum(axis=0)
    def _apply (self, X):
        return X + self.sum

tr2 = Transform2 ()
result = tr2.fit_apply (X)
assert (result==(X+200)).all()

# same result obtained by aliases
result = tr2.fit_transform (X)
assert (result==(X+200)).all()

fitting transform1
fitting transform1
fitting transform2
fitting transform2
applying transform2 transform
fitting transform2
fitting transform2
applying transform2 transform


In [42]:
show_doc (Component, name='Component', title_level=3)
show_doc (Component.__init__, name='__init__', title_level=4)
show_doc (Component.fit, name='fit', title_level=4)
show_doc (Component.transform, name='transform', title_level=4)
show_doc (Component.predict, name='predict', title_level=4)
show_doc (Component.show_result_statistics, name='show_result_statistics', title_level=4)

<h3 id="Component" class="doc_header"><code>class</code> <code>Component</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>Component</code>(**`estimator`**=*`None`*, **`name`**:`Optional`\[`str`\]=*`None`*, **`data_converter`**:`Optional`\[[`DataConverter`](/core.data_conversion.html#DataConverter)\]=*`None`*, **`data_io`**:`Optional`\[[`DataIO`](/core.utils.html#DataIO)\]=*`None`*, **`model_plotter`**:`Optional`\[[`ModelPlotter`](/core.utils.html#ModelPlotter)\]=*`None`*, **`logger`**=*`None`*, **`verbose`**:`int`=*`0`*, **\*\*`kwargs`**) :: `ClassifierMixin`

```
Base component class used in our Pipeline.
```

<h4 id="__init__" class="doc_header"><code>__init__</code><a href="__main__.py#L5" class="source_link" style="float:right">[source]</a></h4>

> <code>__init__</code>(**`estimator`**=*`None`*, **`name`**:`Optional`\[`str`\]=*`None`*, **`data_converter`**:`Optional`\[[`DataConverter`](/core.data_conversion.html#DataConverter)\]=*`None`*, **`data_io`**:`Optional`\[[`DataIO`](/core.utils.html#DataIO)\]=*`None`*, **`model_plotter`**:`Optional`\[[`ModelPlotter`](/core.utils.html#ModelPlotter)\]=*`None`*, **`logger`**=*`None`*, **`verbose`**:`int`=*`0`*, **\*\*`kwargs`**)

```
Initialize attributes and fields.

Parameters
----------
estimator : estimator (classifier or transformer) or None, optional
    Estimator being wrapped.
name : Pipeline or None, optional
    Name of component. If not provided, it is inferred from the name of the 
    estimator's class, or the name of the custom class defining the componet.
data_converter : DataConverter or None, optional
    Converts incoming data to format expected by component, and convert 
    outgoing result to format expected by caller.
data_io : DataIO or None, optional
    Manages data serialization and deserialization.
model_plotter : ModelPlotter or None, optional
    Helper object that allows to retrieve information to be shown about this 
    component, as part of a Pipeline diagram.
logger : logging.logger or None, optional
    Logger used to write messages
verbose : int, optional
    Verbosity, 0: warning or critical, 1: info, 2: debug.
```

<h4 id="fit" class="doc_header"><code>fit</code><a href="functools.py#L371" class="source_link" style="float:right">[source]</a></h4>

> <code>fit</code>(**`X`**, **`y`**=*`None`*, **`load`**=*`True`*, **`save`**=*`True`*, **`func`**=*`'_fit'`*, **\*\*`kwargs`**)



<h4 id="transform" class="doc_header"><code>transform</code><a href="__main__.py#L134" class="source_link" style="float:right">[source]</a></h4>

> <code>transform</code>(**\*`X`**, **`load`**=*`True`*, **`save`**=*`True`*, **\*\*`kwargs`**)

```
Transforms the data X and returns the transformed data.

Uses the previously transformed data if it's found in disk and overwrite 
is False.
```

<h4 id="predict" class="doc_header"><code>predict</code><a href="functools.py#L371" class="source_link" style="float:right">[source]</a></h4>

> <code>predict</code>(**\*`X`**, **`load`**=*`True`*, **`save`**=*`True`*, **\*\*`kwargs`**)



<h4 id="show_result_statistics" class="doc_header"><code>show_result_statistics</code><a href="__main__.py#L201" class="source_link" style="float:right">[source]</a></h4>

> <code>show_result_statistics</code>(**`result`**=*`None`*, **`training_data_flag`**=*`False`*)

```
Show statistics of transformed data.

Parameters
----------
result: DataFrame or other data structure or None, optional
    Transformed data whose statistics we show. If not provided, it is loaded 
    from disk.
training_data_flag: bool, optional
    If True, transformed training data is loaded, otherwise transformed test 
    data is loaded.
```

## Sub-classes

In [43]:
#export
class SamplingComponent (Component):
    """
    Component that makes use of labels in transform method.
    
    When calling the transform method, one of the columns of the received data 
    is assumed to contain the ground-truth labels. This allows the transform 
    method to modify the number of observations, changing the number of rows in 
    the data and in the labels. See `PandasConverter` class in 
    `block_types.core.data_conversion`.
    """
    def __init__ (self,
                  estimator=None,
                  transform_uses_labels=True,
                  **kwargs):

        # the SamplingComponent over-rides the following parameters:
        super().__init__ (estimator=estimator,
                          transform_uses_labels=transform_uses_labels,
                          **kwargs)

In [44]:
show_doc (SamplingComponent, title_level=3)

<h3 id="SamplingComponent" class="doc_header"><code>class</code> <code>SamplingComponent</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>SamplingComponent</code>(**`estimator`**=*`None`*, **`transform_uses_labels`**=*`True`*, **\*\*`kwargs`**) :: [`Component`](/core.block_types.html#Component)

```
Component that makes use of labels in transform method.

When calling the transform method, one of the columns of the received data 
is assumed to contain the ground-truth labels. This allows the transform 
method to modify the number of observations, changing the number of rows in 
the data and in the labels. See `PandasConverter` class in 
`block_types.core.data_conversion`.
```

In [45]:
#export
class SklearnComponent (Component):
    """
    Component that saves estimator parameters in pickle format.
    
    Convenience subclass used when the results can be saved in 
    pickle format. See `SklearnIO` class in `core.utils`.
    """
    def __init__ (self,
                  estimator=None,
                  data_io=None,
                  transform_uses_labels=False,
                  **kwargs):

        if data_io is None:
            data_io = SklearnIO (**kwargs)
        
        super().__init__ (estimator=estimator,
                          data_io = data_io,
                          transform_uses_labels=False,
                          **kwargs)

# alias
PickleSaverComponent = SklearnComponent

In [46]:
show_doc (SklearnComponent, name = 'SklearnComponent', title_level=3)

<h3 id="SklearnComponent" class="doc_header"><code>class</code> <code>SklearnComponent</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>SklearnComponent</code>(**`estimator`**=*`None`*, **`data_io`**=*`None`*, **`transform_uses_labels`**=*`False`*, **\*\*`kwargs`**) :: [`Component`](/core.block_types.html#Component)

```
Component that saves estimator parameters in pickle format.

Convenience subclass used when the results can be saved in 
pickle format. See `SklearnIO` class in `core.utils`.
```

In [47]:
#export
class NoSaverComponent (Component):
    """Component that does not save any data."""
    def __init__ (self,
                  estimator=None,
                  data_io=None,
                  **kwargs):

        if data_io is None:
            data_io = NoSaverIO (**kwargs)
        
        super().__init__ (estimator=estimator,
                          data_io=data_io,
                          **kwargs)

In [48]:
show_doc (NoSaverComponent, name = 'SklearnComponent', title_level=3)

<h3 id="SklearnComponent" class="doc_header"><code>class</code> <code>SklearnComponent</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>SklearnComponent</code>(**`estimator`**=*`None`*, **`data_io`**=*`None`*, **\*\*`kwargs`**) :: [`Component`](/core.block_types.html#Component)

```
Component that does not save any data.
```

In [49]:
#export
class OneClassSklearnComponent (SklearnComponent):
    """Component that uses only normal data (labelled with 0) for fitting parameters."""
    def __init__ (self,
                  estimator=None,
                  **kwargs):
        super().__init__ (estimator=estimator,
                          **kwargs)

    def _fit (self, X, y=None):
        assert y is not None, 'y must be provided in OneClassSklearnComponent class'
        X = X[y==0]

        assert self.estimator is not None, 'estimator must be provided in OneClassSklearnComponent class'
        self.estimator.fit (X, y)

In [50]:
show_doc (OneClassSklearnComponent, name = 'OneClassSklearnComponent', title_level=3)

<h3 id="OneClassSklearnComponent" class="doc_header"><code>class</code> <code>OneClassSklearnComponent</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>OneClassSklearnComponent</code>(**`estimator`**=*`None`*, **\*\*`kwargs`**) :: [`SklearnComponent`](/core.block_types.html#SklearnComponent)

```
Component that uses only normal data (labelled with 0) for fitting parameters.
```

In [51]:
#export
class PandasComponent (Component):
    """
    Component that preserves the DataFrame format for incoming data and results.
    
    This component also writes results in parquet format, by default.
    See `PandasConverter` in `core.data_conversion` for details on the data 
    conversion performed.
    """
    def __init__ (self,
                  estimator=None,
                  data_converter=None,
                  data_io=None,
                  **kwargs):

        if data_converter is None:
            data_converter = PandasConverter (**kwargs)
        if data_io is None:
            data_io = PandasIO (**kwargs)

        super().__init__ (estimator=estimator,
                          data_converter=data_converter,
                          data_io=data_io,
                          **kwargs)

In [52]:
show_doc (PandasComponent, name='PandasComponent', title_level=3)

<h3 id="PandasComponent" class="doc_header"><code>class</code> <code>PandasComponent</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>PandasComponent</code>(**`estimator`**=*`None`*, **`data_converter`**=*`None`*, **`data_io`**=*`None`*, **\*\*`kwargs`**) :: [`Component`](/core.block_types.html#Component)

```
Component that preserves the DataFrame format for incoming data and results.

This component also writes results in parquet format, by default.
See `PandasConverter` in `core.data_conversion` for details on the data 
conversion performed.
```