In [12]:
import pandas as pd
import numpy as np
from math import inf
from sklearn.model_selection import train_test_split
from sklearn.svm import NuSVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from joblib import dump #more efficient than pickle on objects that carry large numpy arrays internally


In [7]:
df = pd.read_csv('../data/mnist/all_4hp_rusty.csv')
df = df.iloc[:300]
df.head()

Unnamed: 0,filters,1st_dense,lr,momentum,acc_0,acc_1,acc_2,acc_3,acc_4,acc_5,...,loss_2,loss_3,loss_4,loss_5,loss_6,loss_7,loss_8,loss_9,loss_10,loss_11
0,32,20,7.2e-05,0.038493,0.542643,0.734071,0.794643,0.826357,0.843929,0.855071,...,0.88497,0.751808,0.664132,0.601722,0.55586,0.521339,0.493939,0.471849,0.453848,0.438147
1,8,60,2.8e-05,0.416737,0.269214,0.419,0.514714,0.584214,0.643,0.694143,...,1.74757,1.584297,1.437934,1.308895,1.195554,1.096427,1.010322,0.935681,0.871148,0.815341
2,128,20,0.002175,0.831415,0.944357,0.962857,0.969357,0.972786,0.976714,0.973857,...,0.105777,0.090577,0.080609,0.085423,0.073039,0.068554,0.069195,0.06901,0.071025,0.072281
3,64,120,0.016359,0.174064,0.962143,0.9695,0.972143,0.978786,0.981071,0.982143,...,0.091528,0.069397,0.063511,0.059231,0.056331,0.05106,0.055757,0.056235,0.048868,0.056125
4,128,20,3.7e-05,0.032464,0.704214,0.802786,0.834786,0.856071,0.867929,0.88,...,0.699835,0.607823,0.543101,0.49829,0.466135,0.441281,0.421004,0.404069,0.389975,0.377923


In [8]:
num_epochs = 12
known_curve = 0.25
min_hp_idx = 0
#max_hp_idx = 5 #when using 6hp search space
max_hp_idx = 3  #when using 4hp search space

hps = df[df.columns[min_hp_idx:max_hp_idx+1]].to_numpy()

accs = df[df.columns[max_hp_idx+1:max_hp_idx+1+int(num_epochs*known_curve)]].to_numpy()
target_acc = df[df.columns[max_hp_idx+num_epochs]].to_numpy()

losses = df[df.columns[max_hp_idx+num_epochs+1:max_hp_idx+num_epochs+1+int(num_epochs*known_curve)]].to_numpy()
target_loss = df[df.columns[max_hp_idx+num_epochs+num_epochs]].to_numpy()

In [9]:
#calculate finite diferences of 1st and 2nd order
def finite_difs(curve):
    difs1 = []
    for i in range(curve.shape[0]):
        difs1.append([])
        for j in range(1,curve.shape[1]):
            difs1[i].append(curve[i][j]-curve[i][j-1])
    difs2 = []
    for i in range(curve.shape[0]):
        difs2.append([])
        for j in range(1,len(difs1[0])):
            difs2[i].append(difs1[i][j]-difs1[i][j-1])
    difs1 = np.array(difs1)
    difs2 = np.array(difs2)
    return difs1, difs2
#X = np.append(np.append(X,difs1,1),difs2,1)
#X.shape

In [10]:
#for loss prediction
difs1, difs2 = finite_difs(losses)
X = np.append(np.append(np.append(hps,losses,1),difs1,1),difs2,1)
y = target_loss


In [13]:
#scale
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()
X = x_scaler.fit_transform(X)
y = y_scaler.fit_transform(y.reshape(-1, 1))
#save the scalers so that they can be used when using the SVR in another program
dump(x_scaler,"x_scaler.joblib") 
dump(y_scaler,"y_scaler.joblib")

['y_scaler.joblib']

In [905]:
#split in train test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print(str(X_train.shape)+" "+str(X_test.shape))

(240, 10) (60, 10)


In [906]:
#instatiate and train predictor
model = NuSVR()
model.fit(X_train,y_train.ravel())

NuSVR()

In [None]:
#Save model
filename = "modelo.joblib"
dump(model, "modelo.joblib")

In [907]:
#MSE
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred)

0.000575563741164598

In [908]:
#R^2
model.score(X_test,y_test)

0.9728998874168934

In [909]:
cvs = cross_val_score(model, X_train, y_train.ravel(), cv = 5, scoring='r2')
cvs, cvs.mean(), cvs.std()

(array([0.98500818, 0.28838295, 0.96011379, 0.94721253, 0.87933589]),
 0.8120106661175267,
 0.2641442384797953)

In [887]:
#try to optimize regressor hps

from sklearn.model_selection import cross_val_score

best, C_best, Nu_best, gamma_best = -inf, -inf, -inf, -inf
for _ in range(1000):
    C = np.exp(np.random.uniform(np.log(1e-3),np.log(10.0)))
    Nu = np.random.uniform(0,1)
    gamma = "scale"
    model = NuSVR(C=C,nu=Nu,gamma=gamma)
    cvs = cross_val_score(model, X_train, y_train.ravel(), cv = 5, scoring='r2').mean()
    if best < cvs:
        best = cvs
        C_best, Nu_best, gamma_best = C, Nu, gamma
best, C_best, Nu_best, gamma_best

(0.7663090107089143, 7.3135497213543115, 0.8924509605431109, 'scale')

In [888]:
model = NuSVR(C=C_best,nu=Nu_best,gamma=gamma_best)
model.fit(X_train,y_train.ravel())

cvs = cross_val_score(model, X_train, y_train.ravel(), cv = 5, scoring='r2').mean()
cvs

0.7663090107089143

In [889]:
#model.fit(X_train,y_train.ravel())
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred), model.score(X_test,y_test)

(0.0005593760485826635, 0.9531359618257825)