In [46]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.linear_model import LinearRegression

In [47]:
df = pd.DataFrame(columns=['X1', 'X2', 'y'], data=[
                                                   [1,16,9],
                                                   [4,36,16],
                                                   [1,16,9],
                                                   [2,9,8],
                                                   [3,36,15],
                                                   [2,49,16],
                                                   [4,25,14],
                                                   [5,36,17]
])

In [48]:
train = df.iloc[:6]
test = df.iloc[6:]

train_X = train.drop('y', axis=1)
train_y = train.y

test_X = test.drop('y', axis=1)
test_y = test.y

In [49]:
class CustomTargetTransformer_Ejercicio(BaseEstimator, TransformerMixin):

  def fit(self, target):
    return self

  def transform(self, target):
    print('\n%%%%%%%%%%%%%%%custom_target_transform() called.\n')
    target_ = target.copy()
    target_ = np.sqrt(target_)
    return target_

  def inverse_transform(self, target):
    print('\n%%%%%%%%%%%%%%%custom_inverse_target_transform() called.\n')
    target_ = target.copy()
    target_ = (target_**2) 
    return target_

In [50]:
class ExperimentalTransformer_Ejercicio(BaseEstimator, TransformerMixin):

  def __init__(self, feature_name):
    print('\n>>>>>>>init() ExperimentalTransformer_Ejercicio called.\n')
    self.feature_name = feature_name

  def fit(self, X, y = None):
    print('\n>>>>>>>fit() called.\n')
    return self

  def transform(self, X, y = None):
    print('\n>>>>>>>transform() called.\n')
    X_ = X.copy() # creating a copy to avoid changes to original dataset
    X_[self.feature_name] = np.sqrt(X_[self.feature_name])
    return X_

In [51]:
# with input transformation & target transformation
print("Creación Pipeline_Ejercicio")
pipe = Pipeline(steps=[
                       ('experimental_trans', ExperimentalTransformer_Ejercicio('X2')),
                       ('linear_model', LinearRegression())
])

model = TransformedTargetRegressor(regressor=pipe,
                                   transformer=CustomTargetTransformer_Ejercicio(),
                                   check_inverse=False)

print("fit pipeline 5 [fit Model]")
model.fit(train_X, train_y)
print("predict via pipeline 5 [Model]")
preds = model.predict(test_X)
print(f"\n{preds}")  # should be [196. 289.]
print(f"RMSE: {np.sqrt(mean_squared_error(test_y, preds))}\n")

Creación Pipeline_Ejercicio

>>>>>>>init() ExperimentalTransformer_Ejercicio called.

fit pipeline 5 [fit Model]

%%%%%%%%%%%%%%%custom_target_transform() called.


>>>>>>>init() ExperimentalTransformer_Ejercicio called.


>>>>>>>fit() called.


>>>>>>>transform() called.

predict via pipeline 5 [Model]

>>>>>>>transform() called.


%%%%%%%%%%%%%%%custom_inverse_target_transform() called.


[13.74406175 17.1432055 ]
RMSE: 0.2073791245738714



Ejemplo con el escalador de la libreria SkLearn

In [56]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.compose import TransformedTargetRegressor

np.random.seed(0)
data = {
    'x1': np.random.randint(0, 100, 15),
    'x2': np.random.randint(0, 100, 15)
}
df = pd.DataFrame(data)
df['y'] = df['x1'] * df['x2']

X = df[['x1', 'x2']]
y = df['y']

Pipe_2 = Pipeline(steps=[('StandardScaler', StandardScaler()),
                         ('MinMaxScaler', MinMaxScaler()),
                         ('LinearRegression', LinearRegression())])

model = TransformedTargetRegressor(regressor=Pipe_2,
                                   check_inverse=False)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Fit pipeline [fit Model]")
model.fit(X_train, y_train)
print("Predict via pipeline [Model]")
preds = model.predict(X_test)
print(f"\n{preds}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, preds))}\n")
 


Fit pipeline [fit Model]
Predict via pipeline [Model]

[5944.76222675 3060.68832026 2852.84989811]
RMSE: 773.2515731830867



In [57]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
ridge_model = TransformedTargetRegressor(regressor=Ridge(), check_inverse=False)
ridge_model.fit(X_train, y_train)
ridge_preds = ridge_model.predict(X_test)
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_preds))
print(f"RMSE con Ridge: {ridge_rmse}")

# Regresión polinómica de grado 2
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)
poly_model = TransformedTargetRegressor(regressor=LinearRegression(), check_inverse=False)
poly_model.fit(X_train_poly, y_train)
poly_preds = poly_model.predict(X_test_poly)
poly_rmse = np.sqrt(mean_squared_error(y_test, poly_preds))
print(f"RMSE con regresión polinómica: {poly_rmse}")

RMSE con Ridge: 773.3035482721302
RMSE con regresión polinómica: 1.726643214759939e-12
