In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

X, y = make_regression(n_samples=1000, n_features=5,
                       noise=0.1, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

param_grid = {
    'scaling': [StandardScaler(), None],
    'feature_selection__k': [2, 3, 'all'],
    'svr__kernel': ['linear', 'rbf'],
    'svr__C': [0.1, 1, 10, 100],
    'svr__gamma': ['scale', 'auto', 0.1, 1, 10]
}

pipeline = Pipeline([
    ('scaling', StandardScaler()),  # Chuẩn hóa dữ liệu
    ('feature_selection', SelectKBest(score_func=f_regression, k='all')),     # Chọn đặc trưng quan trọng
    ('svr', SVR())  # Mô hình dự đoán
])

grid_search = GridSearchCV(pipeline, param_grid, cv=5,
                           scoring='neg_mean_squared_error', verbose=1)
grid_search.fit(X_train, y_train)

print("Best parameters found:", grid_search.best_params_)
best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error on Test Set:", mse)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
