In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)

# Feature selection using SelectKBest with f_regression scoring
selector = SelectKBest(score_func=f_regression, k=3)
X_selected = selector.fit_transform(X, y)

# Polynomial feature engineering
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

print("Original feature matrix shape:", X.shape)
print("Selected feature matrix shape:", X_selected.shape)
print("Polynomial feature matrix shape:", X_poly.shape)

Original feature matrix shape: (100, 5)
Selected feature matrix shape: (100, 3)
Polynomial feature matrix shape: (100, 20)


In [2]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the algorithms
algorithms = {
    'Ridge': Ridge(),
    'RandomForest': RandomForestRegressor(),
    'ExtraTrees': ExtraTreesRegressor(),
    'AdaBoost': AdaBoostRegressor(),
    'SVM': SVR()
}

# just example
param_grids = {
    'Ridge': {'alpha': [0.1, 1.0, 10.0]},
    'RandomForest': {'n_estimators': [50, 100, 200]},
    'ExtraTrees': {'n_estimators': [50, 100, 200]},
    'AdaBoost': {'n_estimators': [50, 100, 200]},
    'SVM': {'C': [0.1, 1.0, 10.0], 'kernel': ['linear', 'rbf']}
}

# Tune hyperparameters using grid search
for name, estimator in algorithms.items():
    grid_search = GridSearchCV(estimator, param_grids[name], scoring='neg_mean_squared_error', cv=3)
    grid_search.fit(X_train, y_train)
    best_estimator = grid_search.best_estimator_
    y_pred = best_estimator.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"{name} - Best Parameters: {grid_search.best_params_}, MSE: {mse}")

Ridge - Best Parameters: {'alpha': 0.1}, MSE: 0.05733078713593104
RandomForest - Best Parameters: {'n_estimators': 200}, MSE: 4042.3406315870634
ExtraTrees - Best Parameters: {'n_estimators': 200}, MSE: 3305.801135082347
AdaBoost - Best Parameters: {'n_estimators': 200}, MSE: 4481.765814302971
SVM - Best Parameters: {'C': 10.0, 'kernel': 'linear'}, MSE: 0.01492321732702547
