In [2]:

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X = np.array([[1, 2], [3, 4]])

# define the transformation function
log_transform = FunctionTransformer(np.log1p)

# apply the transformation to the dataset
X_transformed = log_transform.transform(X)

# view the transformed data
print(X_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [3]:
# Practical usecases 

# 1. Custom Feature Engineering 

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X = np.array([[1, 2], [3, 4]])

# define a custom feature engineering function
def my_feature_engineering(X):
    return np.hstack((X, X**2))

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_feature_engineering)

# apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

# view the transformed data
print(X_transformed)


[[ 1  2  1  4]
 [ 3  4  9 16]]


In [4]:
# 2. Scaling And Normalization 

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X = np.array([[1, 2], [3, 4]])

# define a custom scaling function
def my_scaling(X):
    return X / np.max(X)

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

# view the transformed data
print(X_transformed)


[[0.25 0.5 ]
 [0.75 1.  ]]


In [6]:
# 3. Data Cleaning 

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset with missing values
X = np.array([[1, 2], [3, np.nan]])

# define a custom cleaning function
def my_cleaning(X):
    X[np.isnan(X)] = 0
    return X

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

# view the transformed data
print(X_transformed)


[[1. 2.]
 [3. 0.]]
