# Implementation: Feature Engineering

We will explore generating new features to help linear models solve non-linear problems.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import PolynomialFeatures, FunctionTransformer

sns.set_theme()

## 1. Polynomial Features
Imagine data that follows a curve ($y = x^2$).

In [None]:
X = np.arange(5).reshape(-1, 1)
print("Original X:\n", X)

# Create x^2, x^3
poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly.fit_transform(X)

print("\nPolynomial X (x, x^2, x^3):\n", X_poly)

## 2. Log Transformation
Useful for skewed data.

In [None]:
# Generate skewed data (Exponential)
data = np.random.exponential(scale=2, size=1000)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
sns.histplot(data, kde=True)
plt.title('Original Skewed Data')

# Apply Log (log1p to handle 0s)
data_log = np.log1p(data)

plt.subplot(1, 2, 2)
sns.histplot(data_log, kde=True)
plt.title('Log Transformed (More Normal)')
plt.show()

## 3. Binning
Grouping continuous data.

In [None]:
df = pd.DataFrame({'Age': [5, 25, 45, 65, 85]})

# Define bins: 0-18, 18-35, 35-60, 60+
df['Age_Group'] = pd.cut(df['Age'], bins=[0, 18, 35, 60, 100], labels=['Child', 'Young Adult', 'Adult', 'Senior'])

print("Binned Age:")
display(df)