# FunctionTransformer

In Python, a FunctionTransformer is a utility from the sklearn.preprocessing module in scikit-learn.                
It is used to apply a custom function to data for transformation, typically in a machine learning pipeline.

The FunctionTransformer takes as input a single function that will be applied to each sample in the data.
This function can be any python function that takes a single argument,such as a lambda function or a user-defined function.
the function sholud return the transformed sample.

In [4]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X=np.array([[1,2],[3,4]])

# define the transformation function
log_transform = FunctionTransformer(np.log1p)

# apply the transformation to the dataset
X_transformed = log_transform.transform(X)

# view the transformed data
print(X_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [6]:
# Practical usecases

# 1. Custom Feature Engineering

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X = np.array([[1,2],[3,4]])

#define a custom feature engineering function
def holi(X):
    return np.hstack((X,X**2))

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(holi)

# apply the transformer to input data
X_transformed= custom_transformer.transform(X)

# view the transformed data
print(X_transformed)

[[ 1  2  1  4]
 [ 3  4  9 16]]


In [7]:
# 2. Scaling and Normalization

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X = np.array([[1,2],[3,4]])

# define a custom scaling function
def my_scaling(X):
    return X / np.max(X)

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

# view the transformed data
print(X_transformed)

[[0.25 0.5 ]
 [0.75 1.  ]]


In [10]:
# 3. Data Cleaning

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset with missing values
X = np.array([[1,2],[3,np.nan]])

# define a custom cleaning function
def my_cleaning(X):
    X[np.isnan(X)] = 0
    return X

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

# view the transformed data
print(X_transformed)


[[1. 2.]
 [3. 0.]]


In [11]:
import numpy as np
import pandas as pd

In [12]:
df=pd.read_csv('placement.csv')

In [13]:
df.head(3)

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1


In [14]:
x=df.drop(columns=['placed'])

In [15]:
y=df['placed']

In [16]:
from sklearn.preprocessing import FunctionTransformer


In [17]:
log_transform= FunctionTransformer(np.log1p)

# apply the transformation to the dataset
x_transformed= log_transform.transform(x)

In [18]:
x_transformed

Unnamed: 0,cgpa,resume_score
0,2.212660,2.017566
1,1.969906,1.819699
2,2.226783,2.288486
3,2.064328,2.112635
4,2.142416,2.116256
...,...,...
95,1.991976,1.998774
96,2.222459,2.170196
97,2.034706,2.172476
98,2.212660,1.891605


In [90]:
import numpy as np
import pandas as pd

In [91]:
df=pd.read_csv('covid_toy.csv')

In [92]:
df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [93]:
df.dtypes

age            int64
gender        object
fever        float64
cough         object
city          object
has_covid     object
dtype: object

In [94]:
for i in df:
    print(i)

age
gender
fever
cough
city
has_covid


In [95]:
l1_int=[]
l2_float=[]
l3_obj=[]

def fun(x):
    for i in df.columns:
        if df[i].dtype==np.int64 :
            l1_int.append(i)

        elif df[i].dtype==np.float64:
            l2_float.append(i)

        else:
            l3_obj.append(i)
    return l1_int,l2_float,l3_obj



from sklearn.preprocessing import FunctionTransformer

# Create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(fun)

# apply the transformation to the dataset
x_transformed = custom_transformer.transform(df)

# View the transformed data
print("Integer data type columns name :",l1_int)
print("Float data type columns name :",l2_float)
print("object data type columns name :",l3_obj)

Integer data type columns name : ['age']
Float data type columns name : ['fever']
object data type columns name : ['gender', 'cough', 'city', 'has_covid']


In [96]:
from sklearn.preprocessing import LabelEncoder
lb=LabelEncoder()
for i in l3_obj:
    df[i]=lb.fit_transform(df[i])

df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,1,103.0,0,2,0
1,27,1,100.0,0,1,1
2,42,1,101.0,0,1,0


In [97]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [107]:
from sklearn.impute import SimpleImputer
for i in df.columns:
    if df[i].isnull().sum()>0:
       
       si=SimpleImputer()
    df[[i]]=si.fit_transform(df[[i]]) 
    

In [108]:
df.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64