## Imports

In [3]:
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

## Create  data

In [4]:
data = {'label': ['dog', 'cat', 'catdog', 'dog', 'catdog'], 'score': [1, 2, 3, 4, 5]}
df = pd.DataFrame(data, columns = ["label", "score"])
df

Unnamed: 0,label,score
0,dog,1
1,cat,2
2,catdog,3
3,dog,4
4,catdog,5


## Define numerical columns

In [7]:
def get_non_numerical_columns(df):
    numerics = list(df.select_dtypes('number').columns)
    cols = list(df.columns)
    return [x for x in cols if x not in numerics]

non_numerics = get_non_numerical_columns(df)
print(non_numerics)

['label']


## Create custom transformer (fit and transform methods)

In [8]:
class ColumnSelector(BaseEstimator, TransformerMixin):
    """Select only specified columns."""
    def __init__(self, columns):
        self.columns = columns
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X[self.columns]

## Create numerical pipeline

In [9]:
cat_pipeline = Pipeline([('cat_selector', ColumnSelector(non_numerics))])

## Fit pipeline

In [10]:
cat_pipeline.fit(df)

Pipeline(memory=None,
         steps=[('cat_selector', ColumnSelector(columns=['label']))],
         verbose=False)

## Transform pipeline

In [11]:
cat_pipeline.transform(df)

Unnamed: 0,label
0,dog
1,cat
2,catdog
3,dog
4,catdog


## From

* https://towardsdatascience.com/pipeline-columntransformer-and-featureunion-explained-f5491f815f