# Function Transformer in machine learning

# The function transformer is a tool in scikit-learn, a popular python library for machine learning, that allows you to apply a specified function to the input data. The function transformer can be useful for performing custom transformations of input data in a machine learning.

##### The functionTransformer takes as input a single function that will be applied to each sample in the data. This function can be any python function that takes a single argument, such as a lambda function or a user defined function. The function should return the transformed sample

In [9]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# Create a dataset
X = np.array([[1,2], [3,4]])

# Define the transformation function
log_transform = FunctionTransformer(np.log1p)

# Apply the transformation to the dataset
X_transformed = log_transform.transform(X)

# View the tranformed data
print(X_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [13]:
# 1. Coustom feature engineering

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# Create a dataset
X = np.array([[1,2], [3,4]])

# define a custom feature engineering function
def squ(X):
    return np.hstack((X, X**2))

# create a functiontransformer to apply the coustom function
coustom_transformer = FunctionTransformer(squ)

# apply the transformer to the input the data
X_transformed = coustom_transformer.transform(X)

# view the transformed data
print(X_transformed)

[[ 1  2  1  4]
 [ 3  4  9 16]]


In [17]:
# 2. scalinig funciton

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X = np.array([[1,2], [3,4]])

# define a custom scaling function
def my_scaling(X):
    return X / np.max(X)

# create a functiontransformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

#apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

# view the transformed data
print(X_transformed)

[[0.25 0.5 ]
 [0.75 1.  ]]


In [24]:
# 3. Data cleaning

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset with missing values
X = np.array([[1,2], [3,np.nan]])

# define a custom cleaning function
def my_cleaning(X):
    X[np.isnan(X)] = 0
    return X 

# create a functiontransformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

#apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

# view the transformed data
print(X_transformed)

[[1. 2.]
 [3. 0.]]


In [95]:
import numpy as np
import pandas as pd

In [97]:
df = pd.read_csv('placement.csv')

In [99]:
df.head()

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1
3,6.88,7.27,1
4,7.52,7.3,1


In [101]:
x=df.drop(columns = ['placed'])
y=df['placed']

In [103]:
from sklearn.model_selection import train_test_split

In [105]:
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [107]:
from sklearn.preprocessing import FunctionTransformer

In [109]:
log_transform = FunctionTransformer(np.log1p)

#apply the transformer to the dataset
x_transformed = log_transform.transform(x)

In [111]:
x_transformed

Unnamed: 0,cgpa,resume_score
0,2.212660,2.017566
1,1.969906,1.819699
2,2.226783,2.288486
3,2.064328,2.112635
4,2.142416,2.116256
...,...,...
95,1.991976,1.998774
96,2.222459,2.170196
97,2.034706,2.172476
98,2.212660,1.891605


In [140]:
df=pd.read_csv('covid_toy.csv')

In [142]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [144]:
from sklearn.preprocessing import LabelEncoder

In [146]:
lb = LabelEncoder()

In [148]:
df['gender'] = lb.fit_transform(df['gender'])
df['cough'] = lb.fit_transform(df['cough'])
df['city'] = lb.fit_transform(df['city'])
df['has_covid'] = lb.fit_transform(df['has_covid'])

In [150]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,1,103.0,0,2,0
1,27,1,100.0,0,1,1
2,42,1,101.0,0,1,0
3,31,0,98.0,0,2,0
4,65,0,101.0,0,3,0


In [152]:
x = df.drop(columns = ['has_covid'])
y=df['has_covid']

In [154]:
from sklearn.model_selection import train_test_split

In [156]:
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [158]:
from sklearn.preprocessing import FunctionTransformer

In [160]:
log_transform = FunctionTransformer(np.log1p)

#apply the transformer to the dataset
x_transformed = log_transform.transform(x)

In [162]:
x_transformed

Unnamed: 0,age,gender,fever,cough,city
0,4.110874,0.693147,4.644391,0.000000,1.098612
1,3.332205,0.693147,4.615121,0.000000,0.693147
2,3.761200,0.693147,4.624973,0.000000,0.693147
3,3.465736,0.000000,4.595120,0.000000,1.098612
4,4.189655,0.000000,4.624973,0.000000,1.386294
...,...,...,...,...,...
95,2.564949,0.000000,4.653960,0.000000,0.000000
96,3.951244,0.000000,4.624973,0.693147,1.098612
97,3.044522,0.000000,4.624973,0.000000,0.000000
98,1.791759,0.000000,4.595120,0.693147,1.386294
