In [106]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.linear_model import LinearRegression

import joblib

In [18]:
df = pd.DataFrame(columns=['X1', 'X2', 'y'], data=[
                                                   [1,16,9],
                                                   [4,36,16],
                                                   [1,16,9],
                                                   [2,9,8],
                                                   [3,36,15],
                                                   [2,49,16],
                                                   [4,25,14],
                                                   [5,36,17]
])

### y = X1 + 2 * sqrt(X2)

train = df.iloc[:6]
test = df.iloc[6:]

train_X = train.drop('y', axis=1)
train_y = train.y

test_X = test.drop('y', axis=1)
test_y = test.y


In [19]:
df

Unnamed: 0,X1,X2,y
0,1,16,9
1,4,36,16
2,1,16,9
3,2,9,8
4,3,36,15
5,2,49,16
6,4,25,14
7,5,36,17


In [20]:
train_X

Unnamed: 0,X1,X2
0,1,16
1,4,36
2,1,16
3,2,9
4,3,36
5,2,49


In [21]:
test_X

Unnamed: 0,X1,X2
6,4,25
7,5,36


In [22]:
model = LinearRegression()

In [23]:
model.fit(train_X,train_y)

LinearRegression()

In [24]:
model.predict(train_X)

array([ 8.77758885, 15.87213059,  8.77758885,  8.4699881 , 14.81091651,
       16.29178711])

In [25]:
mean_squared_error(train_y,model.predict(train_X))

0.07617752082979069

In [26]:
model.predict(test_X)

mean_squared_error(test_y,model.predict(test_X))

0.041104070498024704

In [45]:
pipe = Pipeline([('model',LinearRegression())])

In [46]:
pipe.fit(train_X, train_y)

Pipeline(steps=[('model', LinearRegression())])

In [47]:
pipe.predict(train_X)

array([ 8.77758885, 15.87213059,  8.77758885,  8.4699881 , 14.81091651,
       16.29178711])

In [27]:
def transform_col(df,col):
    df_ = df.copy()
    df_[col] = np.sqrt(df_[col])
    return df_

In [30]:
trans_train_X  = transform_col(train_X, 'X2')
trans_test_X  = transform_col(test_X, 'X2')


In [37]:
model = LinearRegression()

In [38]:
model.fit(trans_train_X,train_y)

LinearRegression()

In [39]:
model.predict(train_X)

array([ 33.,  76.,  33.,  20.,  75., 100.])

In [40]:
mean_squared_error(train_y,model.predict(trans_train_X))

4.733165431326071e-30

In [41]:
model.predict(trans_test_X)

mean_squared_error(test_y,model.predict(trans_test_X))

1.262177448353619e-29

In [100]:

# references: 
# https://towardsdatascience.com/custom-transformers-and-ml-data-pipelines-with-python-20ea2a7adb65
# https://machinelearningmastery.com/how-to-transform-target-variables-for-regression-with-scikit-learn/
# http://zacstewart.com/2014/08/05/pipelines-of-featureunions-of-pipelines.html
# https://stackoverflow.com/questions/43308042/transformer-initialize-twice-in-pipeline

class TransformerColumn(BaseEstimator, TransformerMixin):
    def __init__(self,col_name1, col_name2):
        print("Im a constructor")
        self.col_name1 = col_name1
        self.col_name2 = col_name2
        
    def fit(self,X,y):
        print("I am a fit function")
        return self
    
    def transform(self, X):
        print("I am in transform")
        X_ = X.copy()
        X_[self.col_name1] = np.sqrt(X_[self.col_name1])
        X_[self.col_name2] = np.log(X_[self.col_name2])
        return X_
        
        

In [108]:
pipe = Pipeline([('tranform',TransformerColumn('X2','X1')),
                 ('model',LinearRegression())])

In [109]:
pipe.fit(train_X,train_y)

Pipeline(steps=[('tranform', StandardScaler()), ('model', LinearRegression())])

In [110]:
pipe.predict(train_X)

array([ 8.77758885, 15.87213059,  8.77758885,  8.4699881 , 14.81091651,
       16.29178711])

In [111]:
pipe.predict(test_X)

array([13.72113586, 16.93334467])

In [112]:
joblib.dump(pipe, 'pipe.pkl')

['pipe.pkl']

In [51]:
class Test():
    def __init__(self):
        print("I am a constructor")
        
    def fit(self,a,b):
        print("Addition of ",a," and ",b," = ",a+b)
        
    
    
        

In [52]:
a = Test()

I am a constructor


In [53]:
a.fit(3,5)

Addition of  3  and  5  =  8


In [57]:
def fit(a,b):
    print("Addition of ",a," and ",b," = ",a+b)

In [58]:
fit(4,6)

Addition of  4  and  6  =  10


In [59]:
Test().fit(3,5)

I am a constructor
Addition of  3  and  5  =  8
