In [1]:
import numpy as np, humanfriendly as hf, random
import time
from sklearn.model_selection import train_test_split,RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [2]:
def get_scores(model, xtrain, ytrain, xtest, ytest):
    ypred = model.predict(xtest)
    train = model.score(xtrain, ytrain)
    test = model.score(xtest, y_test)
    name = model.__class__.__name__
    return (name, train, test)

In [3]:
def get_cross(model, data, target, groups=10):
    return cross_val_score(model, data, target, cv=groups)

In [4]:
def prep_data(data, target):
    d = [data[i] for i, _ in enumerate(data)]
    t = [target[i] for i, _ in enumerate(target)]
    return list(zip(d, t))


In [5]:
def create_sample(d, n, replace='yes'):
    if replace == 'yes': s = random.sample(d, n)
    else: s = [random.choice(d)
               for i, _ in enumerate(d) if i < n]
    Xs = [row[0] for i, row in enumerate(s)]
    ys = [row[1] for i, row in enumerate(s)]
    return np.array(Xs), np.array(ys)

In [6]:
def see_time(note,start):
    end = time.perf_counter()
    elapsed = end - start
    print (note,hf.format_timespan(elapsed, detailed=True))

In [8]:
X = np.load('data/X_bank.npy')
# need to add allow_pickle=True parameter
y = np.load('data/y_bank.npy', allow_pickle=True)

In [9]:
sample_size = 4000
data = prep_data(X, y)
Xs, ys = create_sample(data, sample_size, replace='no')
Xs = StandardScaler().fit_transform(Xs)
X_train, X_test, y_train, y_test = train_test_split(Xs, ys, random_state=0)

In [10]:
svm = SVC(gamma='scale', random_state=0)
print (svm.__class__.__name__)
svm.fit(X_train, y_train)
svm_scores = get_scores(svm, X_train, y_train,X_test, y_test)
print (svm_scores[0] + ' (train, test):',svm_scores[1],svm_scores[2])

SVC
SVC (train, test): 0.9416666666666667 0.915


In [11]:
Cs = [0.0001, 0.001]
param_grid = {'C': Cs}

In [12]:
start = time.perf_counter()
rand = RandomizedSearchCV(svm, param_grid, cv=3, n_jobs = -1,random_state=0, verbose=2,n_iter=2)
rand.fit(X, y)
see_time('RandomizedSearchCV total tuning time:')
bp = rand.best_params_
print (bp)

Fitting 3 folds for each of 2 candidates, totalling 6 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   6 | elapsed:   41.6s remaining:   41.6s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  1.1min finished


TypeError: see_time() missing 1 required positional argument: 'start'

In [None]:
svm_tuned = SVC(**bp, gamma='scale', random_state=0)
svm_tuned.fit(X_train, y_train)
svm_scores = get_scores(svm_tuned, X_train, y_train,X_test, y_test)
print (svm_scores[0] + ' (train, test):',svm_scores[1],svm_scores[2])

In [None]:
print ('cross-validation score:')
svm = SVC(gamma='scale')
scores = get_cross(svm, Xs, ys)
print (np.mean(scores))