In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

from sklearn.preprocessing import normalize
from sklearn.model_selection import KFold
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from IPython.display import display

with open('datapoints_dict.pkl', 'rb') as f:
    data = pickle.load(f)

In [12]:
formfactors = []
apls = []

for key in data:
    if not np.isnan(data[key]['apl']):
        formfactors.append(data[key]['form_factor'])
        apls.append(data[key]['apl'])
        
formfactors = np.array(normalize(formfactors, norm='max', axis=1))
apls = np.array(apls)

In [13]:
def shuffle_both(x, y): # while keeping the (x, y) pairs matched
    assert len(x) == len(y)
    new_perm = np.random.permutation(len(x))
    return x[new_perm], y[new_perm]

xs, ys = shuffle_both(formfactors, apls)

In [14]:
from sklearn.svm import SVR

SVR_params = {
    'kernel': Categorical(['rbf']),
    'degree': Integer(1, 10),
    #'gamma': Real(1e-5, 1e-1, prior='log-uniform'),
    'tol': Real(1e-7, 1e-1, prior='log-uniform'),
    'C': Real(1.0, 1e+4, prior='uniform'),
    'epsilon': Real(1e-6, 1e-1, prior='log-uniform')
}

opt = BayesSearchCV(
    SVR(),
    SVR_params,
    cv=KFold(n_splits=5, shuffle=True),
    verbose=3
)

opt.fit(xs, ys)

df = pd.DataFrame(opt.cv_results_)
df.sort_values(by=['rank_test_score'], inplace=True)

display(df)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=8262.816942131687, degree=2, epsilon=0.0002946422763054756, kernel=rbf, tol=0.0011112652527498217;, score=0.944 total time=   1.3s
[CV 2/5] END C=8262.816942131687, degree=2, epsilon=0.0002946422763054756, kernel=rbf, tol=0.0011112652527498217;, score=0.914 total time=   1.1s
[CV 3/5] END C=8262.816942131687, degree=2, epsilon=0.0002946422763054756, kernel=rbf, tol=0.0011112652527498217;, score=0.947 total time=   1.1s
[CV 4/5] END C=8262.816942131687, degree=2, epsilon=0.0002946422763054756, kernel=rbf, tol=0.0011112652527498217;, score=0.958 total time=   1.4s
[CV 5/5] END C=8262.816942131687, degree=2, epsilon=0.0002946422763054756, kernel=rbf, tol=0.0011112652527498217;, score=0.925 total time=   1.0s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=958.9033555186573, degree=4, epsilon=0.003375849947806625, kernel=rbf, tol=1.2358933516078587e-07;, score=0.954 total time=   0.3s
[CV 2/

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,param_epsilon,param_kernel,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
40,0.41398,0.072597,0.045041,0.000797,1559.892425,10,0.042953,rbf,2e-06,"{'C': 1559.8924250713874, 'degree': 10, 'epsil...",0.941717,0.934233,0.953833,0.952025,0.944199,0.945201,0.007135,1
29,0.234784,0.014315,0.040681,0.001768,1678.424004,9,5e-06,rbf,0.004441,"{'C': 1678.424003909024, 'degree': 9, 'epsilon...",0.923027,0.945777,0.948047,0.956958,0.951179,0.944997,0.011609,2
15,1.306071,0.148732,0.052363,0.005804,6385.126736,9,0.023984,rbf,1.3e-05,"{'C': 6385.126735519112, 'degree': 9, 'epsilon...",0.930652,0.955152,0.944519,0.922836,0.959981,0.942628,0.014119,3
5,0.850475,0.228783,0.043097,0.004692,2639.408119,9,4.4e-05,rbf,2e-06,"{'C': 2639.4081194308646, 'degree': 9, 'epsilo...",0.937096,0.936003,0.941072,0.943461,0.955253,0.942577,0.006885,4
12,1.630855,0.475166,0.044794,0.005822,4644.007677,8,0.004089,rbf,0.0,"{'C': 4644.007677056381, 'degree': 8, 'epsilon...",0.948046,0.958193,0.938767,0.95381,0.912522,0.942267,0.016232,5
25,0.439849,0.024027,0.043292,0.000764,8513.906557,10,0.022635,rbf,0.1,"{'C': 8513.906556572074, 'degree': 10, 'epsilo...",0.955907,0.947951,0.916675,0.956252,0.929603,0.941278,0.015649,6
8,2.413591,0.570139,0.040011,0.001804,7068.462035,7,2.2e-05,rbf,0.0,"{'C': 7068.462035100103, 'degree': 7, 'epsilon...",0.938336,0.957005,0.925621,0.930425,0.954541,0.941185,0.012608,7
45,0.767582,0.057957,0.045728,0.00176,9139.840924,1,1.3e-05,rbf,0.016819,"{'C': 9139.84092351972, 'degree': 1, 'epsilon'...",0.945239,0.957015,0.939459,0.956364,0.907332,0.941082,0.018146,8
46,0.481752,0.031318,0.042424,0.000609,7483.110749,1,0.059348,rbf,0.02527,"{'C': 7483.110748941026, 'degree': 1, 'epsilon...",0.945756,0.950418,0.930067,0.937582,0.940527,0.94087,0.006967,9
33,0.254744,0.013242,0.045546,0.001796,3167.345715,9,1e-06,rbf,0.1,"{'C': 3167.3457148407724, 'degree': 9, 'epsilo...",0.954835,0.922319,0.950506,0.951338,0.913759,0.938551,0.017028,10


In [15]:
display(df[['param_degree', 'param_epsilon', 'param_tol', 'param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].head(10))

Unnamed: 0,param_degree,param_epsilon,param_tol,param_C,mean_test_score,std_test_score,rank_test_score
40,10,0.042953,2e-06,1559.892425,0.945201,0.007135,1
29,9,5e-06,0.004441,1678.424004,0.944997,0.011609,2
15,9,0.023984,1.3e-05,6385.126736,0.942628,0.014119,3
5,9,4.4e-05,2e-06,2639.408119,0.942577,0.006885,4
12,8,0.004089,0.0,4644.007677,0.942267,0.016232,5
25,10,0.022635,0.1,8513.906557,0.941278,0.015649,6
8,7,2.2e-05,0.0,7068.462035,0.941185,0.012608,7
45,1,1.3e-05,0.016819,9139.840924,0.941082,0.018146,8
46,1,0.059348,0.02527,7483.110749,0.94087,0.006967,9
33,9,1e-06,0.1,3167.345715,0.938551,0.017028,10


In [16]:
from sklearn.svm import NuSVR

NuSVR_params = {
    'nu': Real(1e-2, 1.0, prior='uniform'),
    #'kernel': Categorical(['rbf']),
    'degree': Integer(1, 10),
    #'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'tol': Real(1e-8, 1e-1, prior='log-uniform'),
    'C': Real(1.0, 1e+4, prior='uniform')
}

opt = BayesSearchCV(
    NuSVR(),
    NuSVR_params,
    cv=KFold(n_splits=5, shuffle=True),
    verbose=3
)

opt.fit(xs, ys)

df = pd.DataFrame(opt.cv_results_)
df.sort_values(by=['rank_test_score'], inplace=True)

display(df)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=8224.655646039495, degree=2, nu=0.6816404054205488, tol=3.651292967873384e-05;, score=0.925 total time=  14.2s
[CV 2/5] END C=8224.655646039495, degree=2, nu=0.6816404054205488, tol=3.651292967873384e-05;, score=0.946 total time=  16.5s
[CV 3/5] END C=8224.655646039495, degree=2, nu=0.6816404054205488, tol=3.651292967873384e-05;, score=0.889 total time=  21.3s
[CV 4/5] END C=8224.655646039495, degree=2, nu=0.6816404054205488, tol=3.651292967873384e-05;, score=0.922 total time=  18.1s
[CV 5/5] END C=8224.655646039495, degree=2, nu=0.6816404054205488, tol=3.651292967873384e-05;, score=0.937 total time=  27.2s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=2050.173768708637, degree=3, nu=0.5976463871073646, tol=4.909970495844769e-06;, score=0.927 total time=   1.9s
[CV 2/5] END C=2050.173768708637, degree=3, nu=0.5976463871073646, tol=4.909970495844769e-06;, score=0.951 total time=   1.3s


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,param_nu,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
9,1.564189,0.200985,0.023144,0.002178,3026.610129,1,0.350302,1e-06,"{'C': 3026.6101292982576, 'degree': 1, 'nu': 0...",0.930306,0.949603,0.916133,0.9296,0.937788,0.932686,0.010967,1
34,3.369472,0.500597,0.023874,0.002459,4329.858565,1,0.379786,0.0,"{'C': 4329.858565063308, 'degree': 1, 'nu': 0....",0.932733,0.950847,0.909784,0.931489,0.936665,0.932304,0.013197,2
29,2.596256,0.240674,0.02478,0.001323,3709.826237,10,0.35661,0.0,"{'C': 3709.8262366830645, 'degree': 10, 'nu': ...",0.930611,0.949608,0.914253,0.930056,0.936582,0.932222,0.011414,3
25,0.346836,0.021869,0.025461,0.000681,3491.246316,1,0.373446,0.1,"{'C': 3491.246315803315, 'degree': 1, 'nu': 0....",0.930393,0.950171,0.915189,0.93024,0.935039,0.932206,0.011203,4
35,3.278656,0.836571,0.026066,0.000401,3867.04768,1,0.379195,0.0,"{'C': 3867.04767951592, 'degree': 1, 'nu': 0.3...",0.931279,0.951195,0.91167,0.930951,0.935691,0.932157,0.012624,5
49,1.220063,0.18549,0.028557,0.000813,1556.623778,4,0.439779,0.0,"{'C': 1556.6237775048917, 'degree': 4, 'nu': 0...",0.929039,0.947767,0.92237,0.929429,0.931574,0.932036,0.008448,6
46,0.418644,0.021779,0.026722,0.000787,4133.636411,10,0.379336,0.1,"{'C': 4133.63641125526, 'degree': 10, 'nu': 0....",0.932595,0.950708,0.910293,0.93096,0.935299,0.931971,0.012911,7
41,1.93284,0.276033,0.023382,0.001155,2485.008362,10,0.390478,0.0,"{'C': 2485.0083615396557, 'degree': 10, 'nu': ...",0.927241,0.948816,0.917211,0.930705,0.933842,0.931563,0.010279,8
40,0.244517,0.021581,0.020848,0.000278,2115.68308,10,0.365269,0.040455,"{'C': 2115.6830796024315, 'degree': 10, 'nu': ...",0.927541,0.948323,0.916268,0.930456,0.935021,0.931522,0.010433,9
30,2.369591,0.253128,0.019503,0.000281,3382.056922,10,0.316544,0.0,"{'C': 3382.05692232899, 'degree': 10, 'nu': 0....",0.928267,0.948571,0.911974,0.93003,0.937468,0.931262,0.012005,10


In [17]:
display(df[['param_nu', 'param_degree', 'param_tol', 'param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].head(10))

Unnamed: 0,param_nu,param_degree,param_tol,param_C,mean_test_score,std_test_score,rank_test_score
9,0.350302,1,1e-06,3026.610129,0.932686,0.010967,1
34,0.379786,1,0.0,4329.858565,0.932304,0.013197,2
29,0.35661,10,0.0,3709.826237,0.932222,0.011414,3
25,0.373446,1,0.1,3491.246316,0.932206,0.011203,4
35,0.379195,1,0.0,3867.04768,0.932157,0.012624,5
49,0.439779,4,0.0,1556.623778,0.932036,0.008448,6
46,0.379336,10,0.1,4133.636411,0.931971,0.012911,7
41,0.390478,10,0.0,2485.008362,0.931563,0.010279,8
40,0.365269,10,0.040455,2115.68308,0.931522,0.010433,9
30,0.316544,10,0.0,3382.056922,0.931262,0.012005,10


In [19]:
import lightgbm as lgbm

LGBM_params = {
    #'boosting_type': Categorical(['gbdt']),
    'num_leaves': Integer(10, 300),
    #'max_depth': Integer(), # or just -1
    'learning_rate': Real(1e-2, 0.5, prior='log-uniform'),
    'n_estimators': Integer(10, 300),
    'min_child_samples': Integer(1, 50),
    'subsample': Real(1e-2, 1.0, prior='uniform'),
    'subsample_freq': Integer(0, 50)
}

opt = BayesSearchCV(
    lgbm.LGBMRegressor(),
    LGBM_params,
    cv=KFold(n_splits=5, shuffle=True),
    verbose=3
)

opt.fit(xs, ys)

df = pd.DataFrame(opt.cv_results_)
df.sort_values(by=['rank_test_score'], inplace=True)

display(df)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.3959771611740126, min_child_samples=13, n_estimators=288, num_leaves=86, subsample=0.32759298913044044, subsample_freq=42;, score=0.843 total time=   2.5s
[CV 2/5] END learning_rate=0.3959771611740126, min_child_samples=13, n_estimators=288, num_leaves=86, subsample=0.32759298913044044, subsample_freq=42;, score=0.897 total time=   2.3s
[CV 3/5] END learning_rate=0.3959771611740126, min_child_samples=13, n_estimators=288, num_leaves=86, subsample=0.32759298913044044, subsample_freq=42;, score=0.889 total time=   2.9s
[CV 4/5] END learning_rate=0.3959771611740126, min_child_samples=13, n_estimators=288, num_leaves=86, subsample=0.32759298913044044, subsample_freq=42;, score=0.800 total time=   2.9s
[CV 5/5] END learning_rate=0.3959771611740126, min_child_samples=13, n_estimators=288, num_leaves=86, subsample=0.32759298913044044, subsample_freq=42;, score=0.878 total time=   2.5s
Fitting 5 folds for e

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_min_child_samples,param_n_estimators,param_num_leaves,param_subsample,param_subsample_freq,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
48,2.886614,0.073384,0.001872,0.000104,0.152959,30,221,268,0.864227,0,"{'learning_rate': 0.15295931213014233, 'min_ch...",0.935222,0.916293,0.918765,0.942445,0.919751,0.926495,0.010391,1
8,2.437226,0.048423,0.001839,0.00042,0.09266,31,221,18,0.972426,10,"{'learning_rate': 0.09265958191664066, 'min_ch...",0.93226,0.900588,0.92157,0.942919,0.931135,0.925695,0.01426,2
46,1.869224,0.070678,0.001543,0.000133,0.223144,36,171,34,0.826414,0,"{'learning_rate': 0.22314378688623554, 'min_ch...",0.938571,0.906739,0.928234,0.90946,0.917633,0.920128,0.011876,3
28,1.775446,0.062559,0.003015,0.002132,0.069115,13,190,10,0.46288,0,"{'learning_rate': 0.06911473862308588, 'min_ch...",0.91377,0.885119,0.922666,0.922841,0.953262,0.919532,0.021799,4
49,6.533745,0.120326,0.001933,0.000263,0.01,13,300,38,0.732692,0,"{'learning_rate': 0.01, 'min_child_samples': 1...",0.883872,0.914262,0.904164,0.943516,0.949068,0.918976,0.02442,5
40,1.839125,0.0594,0.001443,1.4e-05,0.028476,24,193,159,0.669056,20,"{'learning_rate': 0.02847633629147245, 'min_ch...",0.871088,0.900806,0.931179,0.945382,0.943005,0.918292,0.028442,6
21,2.492574,0.081206,0.003538,0.003563,0.024084,40,300,32,1.0,49,"{'learning_rate': 0.024083997278408653, 'min_c...",0.922362,0.909158,0.875675,0.953557,0.929583,0.918067,0.02564,7
19,1.238593,0.137552,0.001684,0.000477,0.29941,50,228,219,0.673271,26,"{'learning_rate': 0.29941015506259194, 'min_ch...",0.890233,0.952605,0.940635,0.933565,0.86949,0.917305,0.031856,8
14,2.204458,0.124033,0.001623,6.4e-05,0.065022,22,170,252,0.745049,45,"{'learning_rate': 0.06502235318860401, 'min_ch...",0.905453,0.927223,0.936663,0.856664,0.960521,0.917305,0.035096,9
38,2.048063,0.035917,0.001472,7.4e-05,0.5,20,99,300,1.0,0,"{'learning_rate': 0.49999999999999994, 'min_ch...",0.899674,0.929689,0.937888,0.93838,0.879476,0.917021,0.023506,10


In [None]:
ndf = df.rename(columns={
    #'param_boosting_type': 'type',
    'param_learning_rate': 'learning_rate',
    'param_num_leaves': 'n_leaves',
    'param_n_estimators': 'n_estimators',
    'param_min_child_samples': 'min_child_samples',
    'param_subsample': 'subsample',
    'param_subsample_freq': 'subsample_freq',
    'rank_test_score': 'rank'
})
display(ndf[['learning_rate', 'n_leaves', 'n_estimators', 'min_child_samples', 'subsample', 'subsample_freq', 'mean_test_score', 'std_test_score', 'rank']].head(10))