In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Sample dataset
np.random.seed(42)
X = pd.DataFrame({
    'X1': np.random.rand(100),
    'X2': np.random.rand(100)
})
# Target variable (nonlinear relationship)
y = 3*X['X1'] + 2*X['X2'] + 4*X['X1']*X['X2'] + np.random.normal(0, 0.1, 100)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model with only original features
model1 = LinearRegression()
model1.fit(X_train, y_train)
y_pred1 = model1.predict(X_test)

print("Model with original features:")
print(f"R² Score: {r2_score(y_test, y_pred1):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred1)):.4f}")

# Feature engineering: Polynomial and interaction terms
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

# Model with engineered features
model2 = LinearRegression()
model2.fit(X_poly_train, y_train)
y_pred2 = model2.predict(X_poly_test)

print("\nModel with polynomial + interaction features:")
print(f"R² Score: {r2_score(y_test, y_pred2):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred2)):.4f}")

# Show feature names
feature_names = poly.get_feature_names_out(X.columns)
print("\nEngineered Features:")
print(feature_names)


Model with original features:
R² Score: 0.9698
RMSE: 0.3425

Model with polynomial + interaction features:
R² Score: 0.9981
RMSE: 0.0855

Engineered Features:
['X1' 'X2' 'X1^2' 'X1 X2' 'X2^2']
