In [1]:
from statistics import mean
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np

In [3]:
x = np.random.rand(5000, 20)
y = np.random.randint(0, 2, 5000)

# Split between train set and ensamble validation
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.33, random_state=42, shuffle=True, stratify=y)

# Create StratifiedKFold object.
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [4]:
# Create some simple classifier
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025, probability=True),
    SVC(gamma=2, C=1, probability=True),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

f1_scores = []
trained_classifiers = []
for classifier in classifiers:
    print(classifier.__class__)
    tmp_f1s = []
    for index, (train_index, test_index) in enumerate(skf.split(x_train, y_train)):
        print(f"Fold {index}...")
        x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]
        classifier.fit(x_train_fold, y_train_fold)
        y_predicted = classifier.predict(x_test_fold)
        tmp_f1s.append(f1_score(y_test_fold, y_predicted))
    classifier.fit(x_train, y_train)
    trained_classifiers.append(classifier)
    f1_scores.append(mean(tmp_f1s))

<class 'sklearn.neighbors._classification.KNeighborsClassifier'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.svm._classes.SVC'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.svm._classes.SVC'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.gaussian_process._gpc.GaussianProcessClassifier'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...




<class 'sklearn.tree._classes.DecisionTreeClassifier'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.ensemble._forest.RandomForestClassifier'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.naive_bayes.GaussianNB'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...
<class 'sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis'>
Fold 0...
Fold 1...
Fold 2...
Fold 3...
Fold 4...


In [5]:
import pickle

pickle.dump(trained_classifiers, open('data/classifiers.p', 'wb'))
pickle.dump(f1_scores, open('data/f1_scores.p', 'wb'))
pickle.dump((x_val, y_val), open('data/val.p', 'wb'))