In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline
from scipy.stats import uniform
from sklearn.preprocessing import StandardScaler

X, y = make_regression(n_samples=1000, n_features=5,
                       noise=0.1, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

param_distributions = {
    'svr__kernel': ['linear', 'rbf'],
    'svr__C': uniform(0.1, 100),
    'svr__gamma': ['scale', 'auto', 0.1, 1, 10]
}

pipeline = Pipeline([
    ('scaling', StandardScaler()),  # Chuẩn hóa dữ liệu
    ('feature_selection', SelectKBest(score_func=f_regression, k='all')),
    ('svr', SVR())
])

random_search = RandomizedSearchCV(pipeline, param_distributions, n_iter=50,
                                   cv=5, scoring='neg_mean_squared_error', verbose=1, random_state=42)
random_search.fit(X_train, y_train)

print("Best parameters found:", random_search.best_params_)
best_model = random_search.best_estimator_

y_pred = best_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error on Test Set:", mse)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best parameters found: {'svr__C': 21.333911067827618, 'svr__gamma': 1, 'svr__kernel': 'linear'}
Mean Squared Error on Test Set: 0.011195893897019714
