In [1]:
import numpy as np, humanfriendly as hf
import time
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ARDRegression
from sklearn.model_selection import GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error


In [2]:
def get_error(model, Xtest, ytest):
    y_pred = model.predict(Xtest)
    return np.sqrt(mean_squared_error(ytest, y_pred)),model.__class__.__name__

In [3]:
def see_time(note,start):
    end = time.perf_counter()
    elapsed = end - start
    print (note,hf.format_timespan(elapsed, detailed=True))

In [4]:
def get_cross(model, data, target, groups=10):
    return cross_val_score(model, data, target, cv=groups,scoring='neg_mean_squared_error')

In [5]:
X = np.load('data/X_tips.npy')
y = np.load('data/y_tips.npy')
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [6]:
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)
ard_model = ARDRegression().fit(X_train_std, y_train)
rmse, ard_model_name = get_error(ard_model, X_test_std, y_test)
print (ard_model_name + '(rmse):',rmse)

ARDRegression(rmse): 0.8745960871429688


In [8]:
iters = [50]
a1 = [1e5, 1e4]
a2 = [1e5, 1e4]
params = {'n_iter': iters, 'alpha_1': a1, 'alpha_2': a2}
grid = GridSearchCV(ard_model, params, cv=5, n_jobs=-1, verbose=1)
start = time.perf_counter()
grid.fit(X_train, y_train)
see_time('training time:',start)
bp = grid.best_params_
print (bp)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


training time: 2 seconds, 206 milliseconds, 192 microseconds and 400 nanoseconds
{'alpha_1': 10000.0, 'alpha_2': 100000.0, 'n_iter': 50}


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    2.0s finished


In [10]:
ard_bpmodel = ARDRegression(**bp).fit(X_train_std, y_train)
rmse, ard_bpmodel_name = get_error(ard_bpmodel, X_test_std, y_test)
print (ard_bpmodel_name + '(rmse):',rmse)

ARDRegression(rmse): 0.8645625277607759


In [12]:
start = time.perf_counter()
scores = get_cross(ard_bpmodel, X, y)
see_time('cross-validation rmse:',start)
rmse = np.sqrt(np.mean(scores) * -1)
print (rmse)

cross-validation rmse: 1 second, 758 milliseconds, 122 microseconds and 100 nanoseconds
1.0376527153702295
