In [0]:
import pickle
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold, train_test_split
import numpy as np
from sklearn.externals import joblib

In [0]:
# Number of random trials
NUM_TRIALS = 30 

In [0]:
# Load the dataset
features = pickle.load(open('/your/features','rb'))    
labels = pickle.load(open('/your/labels','rb'))

features1 = np.empty((3322, 2048))
features1 = features
features1 = [[float(i) for i in l] for l in features] #removes apostrophes from array but turns into list  

features = np.asarray(features1) # turn list back to array that can be parsed
labels = sorted(labels1)


In [0]:
X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                                                        test_size=0.2, random_state=42)

# Set up possible values of parameters to optimise over
p_grid = [
        {
            "kernel": ["linear"],
            "C": [1, 10, 100, 1000]
        },
        {
            "kernel": ["rbf"],
            "C": [1, 10, 100, 1000],
            "gamma": [1e-2, 1e-3, 1e-4, 1e-5]
        }
    ]

# We will use a Support Vector Classifier with the above parameters to optimise over
svm = SVC(probability=True)

In [0]:
# Arrays to store scores
non_nested_scores = np.zeros(NUM_TRIALS)
nested_scores = np.zeros(NUM_TRIALS)

In [0]:
# Loop for each trial
for i in range(NUM_TRIALS):

    # Choose cross-validation techniques for the inner and outer loops,
    # independently of the dataset.
    # E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
    inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
    outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)

    # Non_nested parameter search and scoring
    clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)
    clf.fit(X_train, y_train)  

    non_nested_scores[i] = clf.best_score_
    print("non-nested score : ", non_nested_scores[i])
    
    # Nested CV with parameter optimization
    nested_score = cross_val_score(clf, X=X_train, y=y_train, cv=outer_cv)
    nested_scores[i] = nested_score.mean()
    print("nested score : ", nested_scores[i])

# let us know the training outcome - so we don't have to do it again!
print("\nBest parameters set:")
print(clf.best_params_)

# save the C-SVC training results for future use
joblib.dump(clf.best_estimator_, '/you_dir/svc_estimator.pkl')
joblib.dump(clf, '/your_dir/svc_clf.pkl')


score_difference = non_nested_scores - nested_scores

print("Average difference of {0:6f} with std. dev. of {1:6f}."
      .format(score_difference.mean(), score_difference.std()))