In [1]:
from sklearn.base import BaseEstimator,TransformerMixin

class DemoTransformer(BaseEstimator,TransformerMixin):
    def __init__(self):
        pass
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        return X 
    

In [5]:
# we can do Numerical imputation --> mean
# this fills the missing values with the mean of the columns 
class MeanImputer(BaseEstimator,TransformerMixin):
    def __init__(self,variables=None):
        self.variables=variables
    
    def fit(self,X,y=None):
        self.mean_dict = {}
        for col in self.variables:
            self.mean_dict[col] = X[col].mean()
        return self 
    def transform(self,X):
        X = X.copy()
        for col in self.variables:
            X[col].fillna(self.mean_dict[col],inplace = True)
        return X

In [3]:
# lets us create a demo dataset for the above 
import pandas as pd
import numpy as np


np.random.seed(1)

df = pd.DataFrame(np.random.randint(0,100,(10,2)),columns=['A','B'])
df.iloc[1,0]=np.nan
df.iloc[2,1] = np.nan
df.iloc[3,1] = np.nan
df.iloc[5,0] = np.nan
df

Unnamed: 0,A,B
0,37.0,12.0
1,,9.0
2,75.0,
3,79.0,
4,16.0,1.0
5,,71.0
6,6.0,25.0
7,50.0,20.0
8,18.0,84.0
9,11.0,28.0


In [7]:
mean_imputer = MeanImputer(variables=['A','B'])

In [8]:
mean_imputer.fit(df)

In [9]:
mean_imputer.transform(df)

Unnamed: 0,A,B
0,37.0,12.0
1,36.5,9.0
2,75.0,31.25
3,79.0,31.25
4,16.0,1.0
5,36.5,71.0
6,6.0,25.0
7,50.0,20.0
8,18.0,84.0
9,11.0,28.0
