In [4]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_selector

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.preprocessing import FunctionTransformer

from sklearn.linear_model import LinearRegression  # example model

# Split Your Data

In [None]:
X = df.drop("target_col", axis=1)
y = df["target_col"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# (Optional) Custom Feature Engineering

Example: log transform (only for positive values)

In [None]:
log_transformer = FunctionTransformer(
    lambda x: np.log1p(x), 
    feature_names_out="one-to-one"
)


Example: simple ratio feature

In [None]:
def ratio(a, b):
    return (a / b).reshape(-1, 1)

ratio_transform = FunctionTransformer(
    lambda X: ratio(X[:, 0], X[:, 1]),
    feature_names_out=lambda trans, feats: ["ratio_feature"]
)

# Build Numerical & Categorical Pipelines

Numerical Pipeline

In [None]:
num_pipeline = make_pipeline(
    SimpleImputer(strategy="median"),
    StandardScaler()
)

Categorical Pipeline

In [None]:
cat_pipeline = make_pipeline(
    SimpleImputer(strategy="most_frequent"),
    OneHotEncoder(handle_unknown="ignore")
)

# Combine Everything with ColumnTransformer

In [None]:
preprocessing = ColumnTransformer([
    ("num", num_pipeline, make_column_selector(dtype_include=np.number)),
    ("cat", cat_pipeline, make_column_selector(dtype_include=object)),
])

# Final Model Pipeline

In [None]:
model = LinearRegression()  # you can replace with ANY model

full_pipeline = make_pipeline(
    preprocessing,
    model
)

# Train, Evaluate and Predict

In [None]:
full_pipeline.fit(X_train, y_train)


scores = cross_val_score(full_pipeline, X_train, y_train, cv=5)
print("CV Score:", scores.mean())


preds = full_pipeline.predict(X_test)