In [11]:
'''
Q. What Changed in This Solution?
A. We’ll use a custom wrapper to extract feature names dynamically inside the pipeline, ensuring the preprocessor is only fitted once:
    a. Dynamic Feature Name Extraction Inside the Pipeline:
        - The FeatureNameExtractor step wraps the ColumnTransformer and automatically extracts feature names when the pipeline is fitted.
    b. No Manual Pre-Fit:
        - You no longer need to fit the preprocessor outside the pipeline. Everything happens during pipeline.fit().
    c. Portability:
        - The pipeline is now fully self-contained and can be reused without manual preprocessing steps.'''
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np

# Sample dataset
data = pd.DataFrame({
    'month': ['January', 'February', 'January', 'March'],
    'campaign_type': ['A', 'B', 'A', 'C'],
    'cost': [500, 300, 700, 200],
    'free_trials': [50, 30, 60, 25]
})

# Define feature columns
categorical_columns = ['month', 'campaign_type']
numerical_columns = ['cost']

# Custom transformer to extract feature names
class FeatureNameExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, transformer):
        self.transformer = transformer
        self.feature_names = None

    def fit(self, X, y=None):
        print("featurename fitting")
        self.transformer.fit(X, y)
        self.feature_names = self.transformer.get_feature_names_out()
        return self

    def transform(self, X):
        print("featurename transforming")
        return self.transformer.transform(X)

# Preprocessor to handle categorical and numerical columns
preprocessor = FeatureNameExtractor(
    ColumnTransformer(
        transformers=[
            ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_columns),  # One-hot encode
            ('num', StandardScaler(), numerical_columns)  # Scale numerical columns
        ],
        remainder='passthrough'
    )
)

# Custom transformer for interaction terms
class InteractionFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, numerical_columns, interaction_names):
        self.numerical_columns = numerical_columns
        self.interaction_names = interaction_names

    def fit(self, X, y=None):
        print("interactinction fitting")
        return self

    def transform(self, X):
        print("interactinction transforming")
        interaction_features = []
        for num_col in self.numerical_columns:
            for interaction_name in self.interaction_names:
                interaction_features.append(X[:, num_col] * X[:, interaction_name])
        interaction_features = np.column_stack(interaction_features)
        return np.hstack([X, interaction_features])

    def get_feature_names_out(self, X, y=None):
        return self


# Pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),  # Extract feature names dynamically
    ('interaction', InteractionFeatures(
        numerical_columns=[2],  # Index of 'cost' in the scaled output
        interaction_names=[0, 1]  # Indices of 'month_January' and 'campaign_type_A'
    )),
    ('model', LinearRegression())  # Example model
])

# Features and target
X = data[['month', 'campaign_type', 'cost']]
y = data['free_trials']

# Fit the pipeline
pipeline.fit(X, y)

# Make predictions
predictions = pipeline.predict(X)
print("Predictions:", predictions)


featurename fitting
featurename transforming
interactinction fitting
interactinction transforming
featurename transforming
interactinction transforming
Predictions: [50. 30. 60. 25.]


In [9]:
pipeline.named_steps['interaction'].feature_names = preprocessor.feature_names


In [10]:
preprocessor.feature_names

array(['cat__month_January', 'cat__month_March', 'cat__campaign_type_B',
       'cat__campaign_type_C', 'num__cost'], dtype=object)