In [81]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

from sklearn.preprocessing import normalize
from sklearn.model_selection import KFold
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from IPython.display import display

with open('datapoints_dict.pkl', 'rb') as f:
    data = pickle.load(f)

In [82]:
formfactors = []
thicknesses = []

for key in data:
    if not np.isnan(data[key]['thickness']):
        formfactors.append(data[key]['form_factor'])
        thicknesses.append(data[key]['thickness'])
        
formfactors = np.array(normalize(formfactors, norm='max', axis=1))
thicknesses = np.array(thicknesses)

In [83]:
def shuffle_both(x, y): # while keeping the (x, y) pairs matched
    assert len(x) == len(y)
    new_perm = np.random.permutation(len(x))
    return x[new_perm], y[new_perm]

xs, ys = shuffle_both(formfactors, thicknesses)

In [85]:
from sklearn.svm import SVR

SVR_params = {
    'kernel': Categorical(['rbf']),
    'degree': Integer(2, 10),
    #'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'tol': Real(1e-7, 1e-1, prior='log-uniform'),
    'C': Real(1.0, 1e+4, prior='uniform'),
    'epsilon': Real(1e-6, 1e-1, prior='log-uniform')
}

opt = BayesSearchCV(
    SVR(),
    SVR_params,
    cv=KFold(n_splits=5, shuffle=True),
    verbose=3
)

opt.fit(xs, ys)

df = pd.DataFrame(opt.cv_results_)
df.sort_values(by=['rank_test_score'], inplace=True)

display(df)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=6114.47330133203, degree=5, epsilon=7.203064315094887e-06, kernel=rbf, tol=0.004007654057418518;, score=0.812 total time=   4.2s
[CV 2/5] END C=6114.47330133203, degree=5, epsilon=7.203064315094887e-06, kernel=rbf, tol=0.004007654057418518;, score=0.909 total time=   4.0s
[CV 3/5] END C=6114.47330133203, degree=5, epsilon=7.203064315094887e-06, kernel=rbf, tol=0.004007654057418518;, score=0.917 total time=   3.8s
[CV 4/5] END C=6114.47330133203, degree=5, epsilon=7.203064315094887e-06, kernel=rbf, tol=0.004007654057418518;, score=0.942 total time=   4.8s
[CV 5/5] END C=6114.47330133203, degree=5, epsilon=7.203064315094887e-06, kernel=rbf, tol=0.004007654057418518;, score=0.746 total time=   4.2s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=5379.9544707431405, degree=7, epsilon=5.801278101335959e-05, kernel=rbf, tol=6.136756407613165e-06;, score=0.832 total time=  17.7s
[CV 2/5] END C=

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,param_epsilon,param_kernel,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
23,0.053722,0.005674,0.008353,0.000425,2253.05318,10,0.1,rbf,0.000774,"{'C': 2253.053179893947, 'degree': 10, 'epsilo...",0.937811,0.92184,0.94879,0.950285,0.933684,0.938482,0.01045,1
24,0.069388,0.008479,0.008876,0.000478,2591.321821,4,0.1,rbf,2.7e-05,"{'C': 2591.3218205891462, 'degree': 4, 'epsilo...",0.929251,0.960649,0.928357,0.937406,0.922189,0.935571,0.013441,2
35,0.100847,0.008801,0.008773,0.000762,6038.015369,9,0.1,rbf,1e-06,"{'C': 6038.015369294315, 'degree': 9, 'epsilon...",0.940457,0.949076,0.922374,0.944441,0.919144,0.935099,0.012065,3
25,0.091908,0.029473,0.008705,0.000665,1978.953911,4,0.1,rbf,3e-06,"{'C': 1978.9539114437055, 'degree': 4, 'epsilo...",0.942045,0.937408,0.926586,0.915559,0.948994,0.934118,0.011799,4
40,0.110622,0.019244,0.008801,0.000317,5531.39262,2,0.1,rbf,0.0,"{'C': 5531.39262001693, 'degree': 2, 'epsilon'...",0.948922,0.954086,0.90048,0.942386,0.920127,0.9332,0.020047,5
42,0.201114,0.010752,0.030335,0.002904,10000.0,2,0.030757,rbf,0.1,"{'C': 10000.0, 'degree': 2, 'epsilon': 0.03075...",0.959342,0.929177,0.912367,0.907589,0.95413,0.932521,0.021096,6
18,0.577857,0.074893,0.055913,0.009226,387.005632,2,1e-06,rbf,0.006734,"{'C': 387.00563225960013, 'degree': 2, 'epsilo...",0.95236,0.909936,0.899288,0.948425,0.944142,0.93083,0.021826,7
26,0.055603,0.001963,0.008684,0.000745,7183.761961,3,0.1,rbf,0.000495,"{'C': 7183.761960859248, 'degree': 3, 'epsilon...",0.925983,0.924208,0.931251,0.922649,0.949848,0.930788,0.009961,8
29,0.049074,0.007599,0.009311,0.000609,4936.031303,10,0.1,rbf,0.003505,"{'C': 4936.03130319571, 'degree': 10, 'epsilon...",0.906337,0.960712,0.923652,0.949714,0.91312,0.930707,0.021045,9
48,0.041304,0.002884,0.010318,0.001243,10000.0,4,0.1,rbf,0.04062,"{'C': 10000.0, 'degree': 4, 'epsilon': 0.1, 'k...",0.907525,0.918552,0.942981,0.940853,0.942354,0.930453,0.014657,10


In [91]:
display(df[['param_degree', 'param_epsilon', 'param_tol', 'param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].head(10))

Unnamed: 0,param_degree,param_epsilon,param_tol,param_C,mean_test_score,std_test_score,rank_test_score
23,10,0.1,0.000774,2253.05318,0.938482,0.01045,1
24,4,0.1,2.7e-05,2591.321821,0.935571,0.013441,2
35,9,0.1,1e-06,6038.015369,0.935099,0.012065,3
25,4,0.1,3e-06,1978.953911,0.934118,0.011799,4
40,2,0.1,0.0,5531.39262,0.9332,0.020047,5
42,2,0.030757,0.1,10000.0,0.932521,0.021096,6
18,2,1e-06,0.006734,387.005632,0.93083,0.021826,7
26,3,0.1,0.000495,7183.761961,0.930788,0.009961,8
29,10,0.1,0.003505,4936.031303,0.930707,0.021045,9
48,4,0.1,0.04062,10000.0,0.930453,0.014657,10


In [99]:
from sklearn.svm import NuSVR

NuSVR_params = {
    'nu': Real(1e-3, 1.0, prior='log-uniform'),
    #'kernel': Categorical(['rbf']),
    'degree': Integer(1, 10),
    #'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'tol': Real(1e-7, 1e-1, prior='log-uniform'),
    'C': Real(1.0, 1e+4, prior='uniform')
}

opt = BayesSearchCV(
    NuSVR(),
    NuSVR_params,
    cv=KFold(n_splits=5, shuffle=True),
    verbose=3
)

opt.fit(xs, ys)

df = pd.DataFrame(opt.cv_results_)
df.sort_values(by=['rank_test_score'], inplace=True)

display(df)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=8416.736356945932, degree=4, kernel=rbf, nu=0.006052094819500271, tol=0.012023616634294522;, score=0.917 total time=   0.4s
[CV 2/5] END C=8416.736356945932, degree=4, kernel=rbf, nu=0.006052094819500271, tol=0.012023616634294522;, score=0.920 total time=   0.3s
[CV 3/5] END C=8416.736356945932, degree=4, kernel=rbf, nu=0.006052094819500271, tol=0.012023616634294522;, score=0.933 total time=   0.3s
[CV 4/5] END C=8416.736356945932, degree=4, kernel=rbf, nu=0.006052094819500271, tol=0.012023616634294522;, score=0.940 total time=   0.3s
[CV 5/5] END C=8416.736356945932, degree=4, kernel=rbf, nu=0.006052094819500271, tol=0.012023616634294522;, score=0.949 total time=   0.3s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END C=9152.487485363195, degree=7, kernel=rbf, nu=0.02074112963436873, tol=3.0943526495795276e-05;, score=0.888 total time=   4.1s
[CV 2/5] END C=9152.487485363195, degree=7, ker

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,param_kernel,param_nu,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
29,0.19356,0.009831,0.012771,0.000648,1934.915705,1,rbf,0.019071,0.025179,"{'C': 1934.9157048137286, 'degree': 1, 'kernel...",0.943208,0.95311,0.929274,0.924704,0.955967,0.941253,0.012476,1
28,3.693631,0.386505,0.014264,0.000398,10000.0,4,rbf,0.009306,1e-06,"{'C': 10000.0, 'degree': 4, 'kernel': 'rbf', '...",0.934052,0.942922,0.914635,0.959027,0.950068,0.940141,0.01517,2
34,1.780611,0.098534,0.01255,0.000245,1255.797927,1,rbf,0.040076,0.0,"{'C': 1255.7979265397707, 'degree': 1, 'kernel...",0.931743,0.950445,0.898872,0.955556,0.956799,0.938683,0.021837,3
49,0.263334,0.042163,0.012005,0.000769,10000.0,2,rbf,0.002663,0.000659,"{'C': 10000.0, 'degree': 2, 'kernel': 'rbf', '...",0.947301,0.929736,0.918618,0.924628,0.962874,0.936631,0.01624,4
19,4.159748,0.418924,0.015348,0.000288,6852.587036,6,rbf,0.012479,0.0,"{'C': 6852.587036026188, 'degree': 6, 'kernel'...",0.925026,0.920211,0.949902,0.930354,0.952719,0.935642,0.013219,5
23,1.370627,0.117991,0.011941,0.000623,10000.0,6,rbf,0.00366,0.0,"{'C': 10000.0, 'degree': 6, 'kernel': 'rbf', '...",0.940772,0.912567,0.94452,0.930095,0.949786,0.935548,0.013179,6
36,0.668746,0.070341,0.011231,0.00062,1455.085595,7,rbf,0.015258,0.0,"{'C': 1455.0855949676209, 'degree': 7, 'kernel...",0.927537,0.954883,0.92007,0.931166,0.94031,0.934793,0.011972,7
7,0.419689,0.062542,0.010735,0.001856,2362.493055,5,rbf,0.008242,3e-06,"{'C': 2362.493055352354, 'degree': 5, 'kernel'...",0.941372,0.956456,0.897645,0.934877,0.940859,0.934242,0.019636,8
24,1.213612,0.149046,0.011019,0.0006,5993.561293,2,rbf,0.00553,0.0,"{'C': 5993.561292845297, 'degree': 2, 'kernel'...",0.946371,0.963666,0.927012,0.91858,0.915389,0.934204,0.018253,9
48,0.138194,0.02346,0.013167,0.001623,4986.337104,7,rbf,0.004225,0.024717,"{'C': 4986.337104013021, 'degree': 7, 'kernel'...",0.945758,0.909521,0.938139,0.940928,0.935726,0.934014,0.012693,10


In [100]:
display(df[['param_nu', 'param_degree', 'param_tol', 'param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].head(10))

Unnamed: 0,param_nu,param_degree,param_tol,param_C,mean_test_score,std_test_score,rank_test_score
29,0.019071,1,0.025179,1934.915705,0.941253,0.012476,1
28,0.009306,4,1e-06,10000.0,0.940141,0.01517,2
34,0.040076,1,0.0,1255.797927,0.938683,0.021837,3
49,0.002663,2,0.000659,10000.0,0.936631,0.01624,4
19,0.012479,6,0.0,6852.587036,0.935642,0.013219,5
23,0.00366,6,0.0,10000.0,0.935548,0.013179,6
36,0.015258,7,0.0,1455.085595,0.934793,0.011972,7
7,0.008242,5,3e-06,2362.493055,0.934242,0.019636,8
24,0.00553,2,0.0,5993.561293,0.934204,0.018253,9
48,0.004225,7,0.024717,4986.337104,0.934014,0.012693,10


In [101]:
import lightgbm as lgbm

LGBM_params = {
    'boosting_type': Categorical(['gbdt', 'dart']),
    'num_leaves': Integer(10, 300),
    #'max_depth': Integer(), # or just -1
    'learning_rate': Real(1e-2, 0.5, prior='log-uniform'),
    'n_estimators': Integer(10, 300),
    'min_child_samples': Integer(1, 50),
    'subsample': Real(1e-2, 1.0, prior='uniform'),
    'subsample_freq': Integer(0, 50)
}

opt = BayesSearchCV(
    lgbm.LGBMRegressor(),
    LGBM_params,
    cv=KFold(n_splits=5, shuffle=True),
    verbose=3
)

opt.fit(xs, ys)

df = pd.DataFrame(opt.cv_results_)
df.sort_values(by=['rank_test_score'], inplace=True)

display(df)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END boosting_type=dart, learning_rate=0.02013926474204215, min_child_samples=19, n_estimators=31, num_leaves=186, subsample=0.7039686835532514, subsample_freq=12;, score=0.500 total time=   0.5s
[CV 2/5] END boosting_type=dart, learning_rate=0.02013926474204215, min_child_samples=19, n_estimators=31, num_leaves=186, subsample=0.7039686835532514, subsample_freq=12;, score=0.534 total time=   0.4s
[CV 3/5] END boosting_type=dart, learning_rate=0.02013926474204215, min_child_samples=19, n_estimators=31, num_leaves=186, subsample=0.7039686835532514, subsample_freq=12;, score=0.575 total time=   0.5s
[CV 4/5] END boosting_type=dart, learning_rate=0.02013926474204215, min_child_samples=19, n_estimators=31, num_leaves=186, subsample=0.7039686835532514, subsample_freq=12;, score=0.507 total time=   0.4s
[CV 5/5] END boosting_type=dart, learning_rate=0.02013926474204215, min_child_samples=19, n_estimators=31, num_leaves=186, su

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_boosting_type,param_learning_rate,param_min_child_samples,param_n_estimators,param_num_leaves,param_subsample,param_subsample_freq,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,3.479453,0.270444,0.001495,0.000124,gbdt,0.075651,23,214,158,0.998188,24,"{'boosting_type': 'gbdt', 'learning_rate': 0.0...",0.937425,0.937337,0.916494,0.951838,0.93378,0.935375,0.011297,1
24,59.073031,3.54742,0.009744,0.010539,gbdt,0.017895,1,267,275,0.669616,6,"{'boosting_type': 'gbdt', 'learning_rate': 0.0...",0.940906,0.934641,0.923348,0.940357,0.927735,0.933398,0.00692,2
26,3.193141,0.617889,0.011558,0.015585,gbdt,0.410375,35,202,300,1.0,32,"{'boosting_type': 'gbdt', 'learning_rate': 0.4...",0.892076,0.930759,0.925185,0.921254,0.945778,0.92301,0.017568,3
48,1.905794,0.127109,0.001288,1.9e-05,gbdt,0.072151,1,139,10,0.319675,0,"{'boosting_type': 'gbdt', 'learning_rate': 0.0...",0.934454,0.937996,0.883768,0.921146,0.934351,0.922343,0.020126,4
45,2.958057,0.117022,0.001797,0.000123,gbdt,0.040508,32,300,300,0.674426,3,"{'boosting_type': 'gbdt', 'learning_rate': 0.0...",0.907575,0.925658,0.913381,0.929555,0.927539,0.920742,0.008667,5
8,3.585017,0.346115,0.001881,0.000617,gbdt,0.017644,18,222,281,0.666773,20,"{'boosting_type': 'gbdt', 'learning_rate': 0.0...",0.919288,0.933342,0.911254,0.913179,0.91955,0.919323,0.007739,6
39,0.377489,0.030341,0.000961,2.1e-05,gbdt,0.5,17,10,127,0.449061,0,"{'boosting_type': 'gbdt', 'learning_rate': 0.4...",0.917513,0.896736,0.909614,0.941835,0.929497,0.919039,0.015605,7
13,3.647284,0.141488,0.004113,0.004918,gbdt,0.015627,15,219,285,0.676691,18,"{'boosting_type': 'gbdt', 'learning_rate': 0.0...",0.919103,0.933789,0.864816,0.920822,0.933212,0.914348,0.025502,8
38,0.298269,0.036429,0.001029,5.3e-05,gbdt,0.5,1,16,10,0.317416,0,"{'boosting_type': 'gbdt', 'learning_rate': 0.4...",0.896224,0.900569,0.92987,0.926744,0.902848,0.911251,0.014122,9
15,2.636475,0.063504,0.001582,0.000158,gbdt,0.014356,26,216,21,0.94441,15,"{'boosting_type': 'gbdt', 'learning_rate': 0.0...",0.928754,0.904491,0.888383,0.921505,0.911076,0.910842,0.013995,10


In [105]:
ndf = df.rename(columns={
    'param_boosting_type': 'type',
    'param_learning_rate': 'learning_rate',
    'param_num_leaves': 'n_leaves',
    'param_n_estimators': 'n_estimators',
    'param_min_child_samples': 'min_child_samples',
    'param_subsample': 'subsample',
    'param_subsample_freq': 'subsample_freq',
    'rank_test_score': 'rank'
})
display(ndf[['type', 'learning_rate', 'n_leaves', 'n_estimators', 'min_child_samples', 'subsample', 'subsample_freq', 'mean_test_score', 'std_test_score', 'rank']].head(10))

Unnamed: 0,type,learning_rate,n_leaves,n_estimators,min_child_samples,subsample,subsample_freq,mean_test_score,std_test_score,rank
1,gbdt,0.075651,158,214,23,0.998188,24,0.935375,0.011297,1
24,gbdt,0.017895,275,267,1,0.669616,6,0.933398,0.00692,2
26,gbdt,0.410375,300,202,35,1.0,32,0.92301,0.017568,3
48,gbdt,0.072151,10,139,1,0.319675,0,0.922343,0.020126,4
45,gbdt,0.040508,300,300,32,0.674426,3,0.920742,0.008667,5
8,gbdt,0.017644,281,222,18,0.666773,20,0.919323,0.007739,6
39,gbdt,0.5,127,10,17,0.449061,0,0.919039,0.015605,7
13,gbdt,0.015627,285,219,15,0.676691,18,0.914348,0.025502,8
38,gbdt,0.5,10,16,1,0.317416,0,0.911251,0.014122,9
15,gbdt,0.014356,21,216,26,0.94441,15,0.910842,0.013995,10
