In [None]:
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.svm import SVC
from sklearn.datasets import load_digits

# Using Cross-Validation

In [None]:
dataset = load_digits()

# labels are false if not 1 and true if 1
X, y = dataset.data, dataset.target == 1
# support vector machine using linear kernel
clf = SVC(kernel='linear', C=1)
# performing 5 fold cross validation with accuracy as the metric we are viewing
print("cross validation (accuracy): {}".format(cross_val_score(clf, X, y, cv=5)))
# performing 5 fold cross validation with AUC as the metric we are viewing
print("cross validation (AUC): {}".format(cross_val_score(clf, X, y, cv=5, scoring='roc_auc')))
# performing 5 fold cross validation with recall as the metric we are viewing
print("cross validation (recall): {}".format(cross_val_score(clf, X, y, cv=5, scoring='recall')))

# Using GridSearchCV

In [None]:
# Grid search is an approach to parameter tuning that will methodically 
# build and evaluate a model for each combination of algorithm parameters 
# specified in a grid

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

clf = SVC(kernel='rbf')
# gamma values we want to test our model with
grid_values = {'gamma': [0.001, 0.01, 0.05, 0.1, 1, 10, 100]}
# test accuracy values using the range of gamma values specified in grid_values
grid_clf_acc = GridSearchCV(clf, param_grid=grid_values)
grid_clf_acc.fit(X_train, y_train)

y_decision_fn_scores_acc = grid_clf_acc.decision_function(X_test)

# getting the best gamma parameter
print("grid best parameter (max accuracy): {}".format(grid_clf_acc.best_params_))
# getting the best accuracy score
print("grid best score (accuracy): {}".format(grid_clf_acc.best_score_))

In [None]:
# test AUC values using the range of gamma values specified in grid_values
grid_clf_auc = GridSearchCV(clf, param_grid=grid_values, scoring='roc_auc')
grid_clf_auc.fit(X_train, y_train)

y_decision_fn_scores_auc = grid_clf_auc.decision_function(X_test)

# getting the AUC from test set
print("test set AUC:: {}".format(roc_auc_score(y_test, y_decision_fn_scores_auc)))
# getting the best gamma parameter
print("grid best parameter (max AUC): {}".format(grid_clf_auc.best_params_))
# getting the best AUC score
print("grid best score (AUC): {}".format(grid_clf_auc.best_score_))