<a href="https://colab.research.google.com/github/Ans365332/6may-file-example/blob/main/Function_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# What is function transformer in machine learning ?

# -> The function transformer is a tool in scikit_learn, a popular python library for machine learning,that allows you to aply a specified function to the input data . The function transformer can be useful for performing custom transformations of input data in a machine learning pipeline.


In [2]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
x=np.array([[1,2],[3,4]])


# define the transformation function
log_transform = FunctionTransformer(np.log1p)

# apply the transformation to the dataset
x_transformed = log_transform.transform(x)

# view the transformed data
print(x_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [3]:
# Practical Usecase

# 1.Custom Feature Engineering

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# Create a dataset
X=np.array([[1,2],[3,4]])

# define a custom feature engineering function
def squ(X):
  return np.hstack((X,X**2))

# create a Function transformer to apply the custom function
custom_transformer = FunctionTransformer(squ)

# apply the custom transformer to the dataset
x_transformed = custom_transformer.transform(X)

# view the transformed data
print(x_transformed)


[[ 1  2  1  4]
 [ 3  4  9 16]]


In [4]:
# 2.

from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
X=np.array([[1,2],[3,4]])

# define a custom scaling function
def my_scaling(X):
  return X/np.max(X)


# create a Function transformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the custom transformer to the dataset
x_transformed = custom_transformer.transform(X)

# view the transformed data
print(x_transformed)


[[0.25 0.5 ]
 [0.75 1.  ]]


In [6]:
# 3. Data Cleaning

from sklearn.preprocessing import FunctionTransformer
import numpy as np

#  create a dataset with missing values
X=np.array([[1,2],[3,np.nan]])

# define a custom cleaning function
def my_cleaning(X):
  X[np.isnan(X)]=0
  return X

# create a Function transformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the custom transformer to the dataset
x_transformed = custom_transformer.transform(X)

# view the transformed data
print(x_transformed)


[[1. 2.]
 [3. 0.]]


# On real dataset

In [9]:
import numpy as np
import pandas as pd

In [10]:
df=pd.read_csv('/content/newplacementdata - newplacementdata.csv')
df.head()

Unnamed: 0,cgpa,placement_exam_marks,placed
0,7.19,26,1
1,7.46,38,1
2,7.54,40,1
3,6.42,8,1
4,7.23,17,0


In [11]:
x=df.drop(columns=['placed'])
y=df['placed']


In [12]:
from sklearn.preprocessing import FunctionTransformer

In [13]:
log_transform = FunctionTransformer(np.log1p)

# apply the transformation to the dataset

x_transformed = log_transform.transform(x)



In [14]:
x_transformed

Unnamed: 0,cgpa,placement_exam_marks
0,2.102914,3.295837
1,2.135349,3.663562
2,2.144761,3.713572
3,2.004179,2.197225
4,2.107786,2.890372
...,...,...
995,2.289500,3.806662
996,2.314514,4.189655
997,1.773256,3.555348
998,2.263844,3.850148


In [25]:
# 2.

df=pd.read_csv('/content/insurance - insurance (1).csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [26]:
df['sex']=df['sex'].map({'male':1,'female':0})
df['smoker']=df['smoker'].map({'yes':1,'no':0})
df['region']=df['region'].map({'southwest':1,'southeast':2,'northwest':3,'northeast':4})

In [27]:
x=df.drop(columns=['charges'])
y=df['charges']

In [28]:
log_transform = FunctionTransformer(np.log1p)

# apply the transformation to the dataset

x_transformed = log_transform.transform(x)

In [29]:
x_transformed

Unnamed: 0,age,sex,bmi,children,smoker,region
0,2.995732,0.000000,3.363842,0.000000,0.693147,0.693147
1,2.944439,0.693147,3.548755,0.693147,0.000000,1.098612
2,3.367296,0.693147,3.526361,1.386294,0.000000,1.098612
3,3.526361,0.693147,3.165686,0.000000,0.000000,1.386294
4,3.496508,0.693147,3.397189,0.000000,0.000000,1.386294
...,...,...,...,...,...,...
1333,3.931826,0.693147,3.464798,1.386294,0.000000,1.386294
1334,2.944439,0.000000,3.494080,0.000000,0.000000,1.609438
1335,2.944439,0.000000,3.633631,0.000000,0.000000,1.098612
1336,3.091042,0.000000,3.288402,0.000000,0.000000,0.693147
