In [None]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression

# Sample dataset
data = pd.DataFrame({
    'month': ['January', 'February', 'January', 'March'],
    'campaign_type': ['A', 'B', 'A', 'C'],
    'cost': [500, 300, 700, 200],
    'free_trials': [50, 30, 60, 25]
})

# Define feature columns
categorical_columns = ['month', 'campaign_type']
numerical_columns = ['cost']

# Custom transformer for interaction terms
class InteractionFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, numerical_columns, interaction_columns):
        self.numerical_columns = numerical_columns  # Numerical columns to interact
        self.interaction_columns = interaction_columns  # One-hot encoded columns

    def fit(self, X, y=None):
        return self  # No fitting necessary

    def transform(self, X):
        # Multiply numerical columns with one-hot encoded columns
        interaction_features = []
        for num_col in self.numerical_columns:
            for int_col in self.interaction_columns:
                interaction_features.append(X[num_col] * X[int_col])
        # Combine the original and interaction features
        interaction_features = np.column_stack(interaction_features)
        return np.hstack([X.values, interaction_features])

# Define preprocessors
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), categorical_columns),  # One-hot encode categories
        ('num', StandardScaler(), numerical_columns)  # Scale numerical columns
    ],
    remainder='passthrough'  # Keep other columns as-is
)

# Define pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('interaction', InteractionFeatures(
        numerical_columns=['cost'],
        interaction_columns=['cat__month_January', 'cat__campaign_type_A']  # Specify interaction columns
    )),
    ('model', LinearRegression())  # Example model
])

# Split features and target
X = data[['month', 'campaign_type', 'cost']]
y = data['free_trials']

# Fit the pipeline
pipeline.fit(X, y)

# Predict
predictions = pipeline.predict(X)
