In [1]:
from hyperopt import tpe, hp, fmin, Trials, STATUS_OK

In [2]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split, cross_val_score
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [3]:
from sklearn.datasets import load_boston

In [4]:
data = load_boston()

In [5]:
X, y = data.data, data.target

In [6]:
X.shape

(506, 13)

In [7]:
# IterativeImputer (MICE: Multiple Imputation with Chained Equation)
# KNNImputer

In [8]:
# from sklearn.experimental import enable_iterative_imputer

In [9]:
# from sklearn.impute import IterativeImputer

In [10]:
scaler = StandardScaler()

In [11]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

In [12]:
# import time


In [14]:
svr_params = {
    'C':hp.uniform('C',1.0,50.0),
    'gamma':hp.uniform('gamma',0.001,0.1),
    'epsilon':hp.uniform('epsilon',0.01,0.5)
}

In [15]:
def hyperopt_obj_function(params):
    svr=SVR(**params)
#     print(svr)
    out = cross_val_score(svr, x_train,y_train, cv=5, scoring='neg_mean_squared_error',n_jobs=-1).mean()
#     print(out)
    return out

In [16]:
def surrogate_function(params):
    perf = hyperopt_obj_function(params)
    return {'loss':-perf, 'status':STATUS_OK}

In [27]:
trials = Trials()
best = fmin(surrogate_function, space=svr_params, algo=tpe.suggest, max_evals=400, trials=trials)
print('best:')
print (best)

100%|██████████| 400/400 [00:12<00:00, 31.13trial/s, best loss: 29.718639986439893]
best:
{'C': 48.624581413385314, 'epsilon': 0.3941800285718001, 'gamma': 0.0010244445209546657}


In [28]:
svr_best = SVR(**best)

In [29]:
svr_best.fit(x_train, y_train)

SVR(C=48.624581413385314, epsilon=0.3941800285718001,
    gamma=0.0010244445209546657)

In [30]:
pred_svr = svr_best.predict(x_test)

In [31]:
mean_squared_error(y_test, pred_svr)

31.02962581700846

In [32]:
svr_new = SVR()
svr_param_grid = {'C':range(1,51), 
                  'epsilon':np.linspace(0.001,0.2,num=10), 
                  'gamma':np.linspace(0.001,0.01,num=10)}

In [33]:
best_svr = GridSearchCV(estimator=svr_new, param_grid=svr_param_grid, 
                        scoring='neg_mean_squared_error', cv=5, n_jobs=-1,verbose=1)

In [34]:
best_svr.fit(x_train, y_train)

Fitting 5 folds for each of 5000 candidates, totalling 25000 fits


GridSearchCV(cv=5, estimator=SVR(), n_jobs=-1,
             param_grid={'C': range(1, 51),
                         'epsilon': array([0.001     , 0.02311111, 0.04522222, 0.06733333, 0.08944444,
       0.11155556, 0.13366667, 0.15577778, 0.17788889, 0.2       ]),
                         'gamma': array([0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
       0.01 ])},
             scoring='neg_mean_squared_error', verbose=1)

In [35]:
pred_best_svr = best_svr.best_estimator_.predict(x_test)

In [36]:
mean_squared_error(y_test, pred_best_svr)

31.381617579597226

In [37]:
svr_random = RandomizedSearchCV(estimator=svr_new, param_distributions=svr_param_grid, n_iter=50, cv=5,
                                scoring='neg_mean_squared_error', n_jobs=-1, verbose=1)

In [38]:
svr_random.fit(x_train, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


RandomizedSearchCV(cv=5, estimator=SVR(), n_iter=50, n_jobs=-1,
                   param_distributions={'C': range(1, 51),
                                        'epsilon': array([0.001     , 0.02311111, 0.04522222, 0.06733333, 0.08944444,
       0.11155556, 0.13366667, 0.15577778, 0.17788889, 0.2       ]),
                                        'gamma': array([0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
       0.01 ])},
                   scoring='neg_mean_squared_error', verbose=1)

In [39]:
pred_best_svr_rand = svr_random.best_estimator_.predict(x_test)

In [40]:
mean_squared_error(y_test, pred_best_svr_rand)

31.57769619606882

In [41]:
svr_random.best_params_

{'gamma': 0.001, 'epsilon': 0.1778888888888889, 'C': 44}