# Testing the Optunity framework


In [13]:
import optunity
import optunity.metrics
import sklearn.svm
import numpy as np

### SVM example

In [14]:
from sklearn.datasets import load_digits
digits = load_digits()
n = digits.data.shape[0]

positive_digit = 8
negative_digit = 9

positive_idx = [i for i in range(n) if digits.target[i] == positive_digit]
negative_idx = [i for i in range(n) if digits.target[i] == negative_digit]

# add some noise to the data to make it a little challenging
original_data = digits.data[positive_idx + negative_idx, ...]
data = original_data + 5 * np.random.randn(original_data.shape[0], original_data.shape[1])
labels = [True] * len(positive_idx) + [False] * len(negative_idx)

In [16]:
#we will make the cross-validation decorator once, so we can reuse it later for the other tuning task
# by reusing the decorator, we get the same folds etc.
cv_decorator = optunity.cross_validated(x=data, y=labels, num_folds=5)

def svm_rbf_tuned_auroc(x_train, y_train, x_test, y_test, C, logGamma):
    model = sklearn.svm.SVC(C=C, gamma=10 ** logGamma).fit(x_train, y_train)
    decision_values = model.decision_function(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc

svm_rbf_tuned_auroc = cv_decorator(svm_rbf_tuned_auroc)
# this is equivalent to the more common syntax below
# @optunity.cross_validated(x=data, y=labels, num_folds=5)
# def svm_rbf_tuned_auroc...

svm_rbf_tuned_auroc(C=1.0, logGamma=0.0)

0.5

In [17]:
optimal_rbf_pars, info, _ = optunity.maximize(svm_rbf_tuned_auroc, num_evals=150, C=[0, 10], logGamma=[-5, 0])
# when running this outside of IPython we can parallelize via optunity.pmap
# optimal_rbf_pars, _, _ = optunity.maximize(svm_rbf_tuned_auroc, 150, C=[0, 10], gamma=[0, 0.1], pmap=optunity.pmap)

print("Optimal parameters: " + str(optimal_rbf_pars))
print("AUROC of tuned SVM with RBF kernel: %1.3f" % info.optimum)

Optimal parameters: {'C': 4.818033854166667, 'logGamma': -3.2733450877464594}
AUROC of tuned SVM with RBF kernel: 0.991


In [19]:
info.call_log

{'args': {'C': [6.4013671875,
   8.9013671875,
   3.9013671875,
   2.6513671875,
   7.6513671875,
   5.1513671875,
   0.1513671875,
   0.1123046875,
   5.1123046875,
   7.6123046875,
   6.785143647555586,
   8.435279069524311,
   4.368033854166667,
   3.0910005947015646,
   8.118033854166667,
   5.048239938668534,
   0.6180338541666666,
   0.4993730650338141,
   5.578971354166667,
   8.078971354166667,
   7.2518103142222525,
   8.604532075990617,
   4.834700520833334,
   3.5576672613682314,
   8.584700520833334,
   5.5149066053352005,
   1.0847005208333331,
   0.9660397317004807,
   6.045638020833334,
   8.545638020833334,
   7.718476980888919,
   9.06181028443656,
   5.3013671875,
   4.024333928034898,
   9.0513671875,
   5.981573272001867,
   1.5513671874999997,
   1.4327063983671473,
   6.5123046875,
   9.0123046875,
   8.185143647555586,
   9.519088492882501,
   5.768033854166667,
   4.491000594701565,
   9.518033854166667,
   6.448239938668534,
   2.0180338541666663,
   1.89937306

In [20]:
import pandas
df = optunity.call_log2dataframe(info.call_log)

In [21]:
df.head()

Unnamed: 0,C,logGamma,value
0,6.401367,-2.072754,0.928839
1,8.901367,-3.322754,0.991176
2,3.901367,-0.822754,0.5
3,2.651367,-2.697754,0.983438
4,7.651367,-0.197754,0.5


In [22]:
space = {'kernel': {'linear': {'C': [0, 2]},
                    'rbf': {'logGamma': [-5, 0], 'C': [0, 10]},
                    'poly': {'degree': [2, 5], 'C': [0, 5], 'coef0': [0, 2]}
                    }
         }

In [23]:
def train_model(x_train, y_train, kernel, C, logGamma, degree, coef0):
    """A generic SVM training function, with arguments based on the chosen kernel."""
    if kernel == 'linear':
        model = sklearn.svm.SVC(kernel=kernel, C=C)
    elif kernel == 'poly':
        model = sklearn.svm.SVC(kernel=kernel, C=C, degree=degree, coef0=coef0)
    elif kernel == 'rbf':
        model = sklearn.svm.SVC(kernel=kernel, C=C, gamma=10 ** logGamma)
    else:
        raise ArgumentError("Unknown kernel function: %s" % kernel)
    model.fit(x_train, y_train)
    return model

def svm_tuned_auroc(x_train, y_train, x_test, y_test, kernel='linear', C=0, logGamma=0, degree=0, coef0=0):
    model = train_model(x_train, y_train, kernel, C, logGamma, degree, coef0)
    decision_values = model.decision_function(x_test)
    return optunity.metrics.roc_auc(y_test, decision_values)

svm_tuned_auroc = cv_decorator(svm_tuned_auroc)

In [24]:
optimal_svm_pars, info, _ = optunity.maximize_structured(svm_tuned_auroc, space, num_evals=150)
print("Optimal parameters" + str(optimal_svm_pars))
print("AUROC of tuned SVM: %1.3f" % info.optimum)

Optimal parameters{'C': 5.585286458333334, 'coef0': None, 'kernel': 'rbf', 'logGamma': -3.4089497795847614, 'degree': None}
AUROC of tuned SVM: 0.988


In [27]:
df = optunity.call_log2dataframe(info.call_log)
df.sort_values('value', ascending=False)

Unnamed: 0,C,coef0,degree,kernel,logGamma,value
123,6.744743,,,rbf,-3.403342,0.988292
54,5.585286,,,rbf,-3.408950,0.988292
114,5.457505,,,rbf,-3.483555,0.988292
82,5.735286,,,rbf,-3.485319,0.988292
142,5.196762,,,rbf,-3.508407,0.988133
71,7.468620,,,rbf,-3.474154,0.988128
88,7.447381,,,rbf,-3.473112,0.988128
112,5.742588,,,rbf,-3.465694,0.988128
60,8.144259,,,rbf,-3.382791,0.987969
118,6.047381,,,rbf,-3.381222,0.987969
