# Import dependencies

In [1]:
import numpy as np
import pickle
import pandas as pd
import joblib

# Trajectories with mixed temporal resolutions

## Load classifier and data

In [87]:
def load_classifier_and_data(model, input_vector):
    """
    Loads trained classifier and training + test data.
    
    Arguments:
    model -- which classifier to use ("RF", "GB", "LR", "SVM")
    input_vector -- use high-dimensional ("high-dim") or low dimensional ("lwo-dim") input vectors
    
    
    Returns:
    clf -- trained classifer
    X_train -- data on which classifier was trained
    X_test -- unseen test data for evaluation
    y_train_binary - labels of train data
    y_test_binary - labels of test data
    """
    
    if model == "RF":
        if input_vector == "high-dim":
            path = "../ML-models/random-forest/short-trajectories/varied-temporal-resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "RF_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/random-forest/short-trajectories/varied-temporal-resolutions/low-dimensional-input/"
            clf = joblib.load(open(path + "RF_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    elif model == "GB":
        if input_vector == "high-dim":
            path = "../ML-models/gradient-boosting/short-trajectories/varied-temporal-resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "GB_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/gradient-boosting/short-trajectories/varied-temporal-resolutions/low-dimensional-input/"
            clf = joblib.load(open(path + "GB_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    elif model == "LR":
        if input_vector == "high-dim":
            path = "../ML-models/logistic-regression/short-trajectories/varied_temporal_resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "LR_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/logistic-regression/short-trajectories/varied_temporal_resolutions/low-dimensional-input/"
            clf = joblib.load(open(path + "LR_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    elif model == "SVM":
        if input_vector == "high-dim":
            path = "../ML-models/svm/short-trajectories/varied-temporal-resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "SVM_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/svm/short-trajectories/varied-temporal-resolutions/low-dimensional_input/"
            clf = joblib.load(open(path + "SVM_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    else:
        print("Please select valid model")
        return 0
        
    return clf, X_train, X_test, y_train_binary, y_test_binary

In [88]:
# Notice that trajectories with reduced length are always classified with the high-dimensional input vectors!
clf, X_train, X_test, y_train_binary, y_test_binary = load_classifier_and_data(model="SVM", input_vector="low-dim")

## Evaluate

### Overall prediction accuracies

In [89]:
print("Accuracy on train data: ", clf.score(X_train, y_train_binary))

Accuracy on train data:  0.7954227722259618


In [90]:
print("Accuracy on test data: ", clf.score(X_test, y_test_binary))

Accuracy on test data:  0.7828034990398975


# Trajectories with reduced lengths

## Define evaluation routines

In [52]:
def load_classifier_and_reduced_traj_data(model, trajectory_length):
    """
    Loads trained classifier and training + test data.
    (All classifications were performed using the high-dimensional preprocessing protocol.)
    
    Arguments:
    model -- which classifier to use ("RF", "GB", "LR", "SVM")
    trajectory_length -- length of the trajectories to classify
    
    
    Returns:
    clf -- trained classifer
    X_train -- data on which classifier was trained
    X_test -- unseen test data for evaluation
    y_train_binary - labels of train data
    y_test_binary - labels of test data
    """
    
    if model == "RF":
        base_path = "../../ML-models/random-forest/short-trajectories/reduced_length/" + str(trajectory_length) + "s/"
        clf = pickle.load(open(base_path + "RF_cutted_" + str(trajectory_length) + "s_3kRuns.sav", 'rb'))
        X_train = np.load(base_path + "X_train_cutted_" + str(trajectory_length) + "s.npy")
        X_test = np.load(base_path + "X_test_cutted_" + str(trajectory_length) + "s.npy")
        y_train_binary = np.load(base_path + "y_train_binary_cutted_" + str(trajectory_length) + "s.npy")
        y_test_binary = np.load(base_path + "y_test_binary_cutted_" + str(trajectory_length) + "s.npy")
        
    elif model == "GB":
        base_path = "../../ML-models/gradient-boosting/short-trajectories/reduced_length/" + str(trajectory_length) + "s/"
        clf = pickle.load(open(base_path + "GB_cutted_" + str(trajectory_length) + "s_3kRuns.sav", 'rb'))
        X_train = np.load(base_path + "X_train_cutted_" + str(trajectory_length) + "s.npy")
        X_test = np.load(base_path + "X_test_cutted_" + str(trajectory_length) + "s.npy")
        y_train_binary = np.load(base_path + "y_train_binary_cutted_" + str(trajectory_length) + "s.npy")
        y_test_binary = np.load(base_path + "y_test_binary_cutted_" + str(trajectory_length) + "s.npy")
    
    elif model == "LR":
        base_path = "../../ML-models/logistic-regression/short-trajectories/reduced_length/" + str(trajectory_length) + "s/"
        clf = pickle.load(open(base_path + "LR_cutted_" + str(trajectory_length) + "s_3kRuns.sav", 'rb'))
        X_train = np.load(base_path + "X_train_cutted_" + str(trajectory_length) + "s.npy")
        X_test = np.load(base_path + "X_test_cutted_" + str(trajectory_length) + "s.npy")
        y_train_binary = np.load(base_path + "y_train_binary_cutted_" + str(trajectory_length) + "s.npy")
        y_test_binary = np.load(base_path + "y_test_binary_cutted_" + str(trajectory_length) + "s.npy")
    elif model == "SVM":
        base_path = "../../ML-models/svm/short-trajectories/reduced_length/" + str(trajectory_length) + "s/"
        clf = pickle.load(open(base_path + "SVM_cutted_" + str(trajectory_length) + "s_3kRuns.sav", 'rb'))
        X_train = np.load(base_path + "X_train_cutted_" + str(trajectory_length) + "s.npy")
        X_test = np.load(base_path + "X_test_cutted_" + str(trajectory_length) + "s.npy")
        y_train_binary = np.load(base_path + "y_train_binary_cutted_" + str(trajectory_length) + "s.npy")
        y_test_binary = np.load(base_path + "y_test_binary_cutted_" + str(trajectory_length) + "s.npy")
    else:
        print("Please select valid model")
        return 0
        
    return clf, X_train, X_test, y_train_binary, y_test_binary

In [40]:
def evaluate_clf_on_lengths(model, lengths):
    train_accuracies = []
    test_accuracies = []
    for length in lengths:
        clf, X_train, X_test, y_train_binary, y_test_binary = load_classifier_and_reduced_traj_data(model=model, trajectory_length=length)
        
        print("np.shape(X_train): ", np.shape(X_train))
        print("np.shape(y_train_binary): ", np.shape(y_train_binary))
        print("Accuracy on train data: ", clf.score(X_train, y_train_binary))
        train_accuracies.append(clf.score(X_train, y_train_binary))
        
        print("np.shape(X_test): ", np.shape(X_test))
        print("np.shape(y_test_binary): ", np.shape(y_test_binary))
        print("Accuracy on test data: ", clf.score(X_test, y_test_binary))
        test_accuracies.append(clf.score(X_test, y_test_binary))
    
    return train_accuracies, test_accuracies

## Evaluate models

In [42]:
# List of available lengths of trajectories
lengths = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 500, 1000, 1500, 2000]

### Random forest

In [41]:
RF_train_accuracies, RF_test_accuracies = evaluate_clf_on_lengths(model="RF", lengths=lengths)

np.shape(X_train):  (19082, 5)
np.shape(y_train_binary):  (19082,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.7551095273032177


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4918, 5)
np.shape(y_test_binary):  (4918,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.6s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s


Accuracy on test data:  0.5931272875152501


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19147, 6)
np.shape(y_train_binary):  (19147,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.7961038282759701


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s


np.shape(X_test):  (4853, 6)
np.shape(y_test_binary):  (4853,)


[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


Accuracy on test data:  0.6536163198021843


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19356, 7)
np.shape(y_train_binary):  (19356,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.8206757594544327


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4644, 7)
np.shape(y_test_binary):  (4644,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s


Accuracy on test data:  0.6675279931093885


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19102, 8)
np.shape(y_train_binary):  (19102,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.8315883153596482


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s


np.shape(X_test):  (4898, 8)
np.shape(y_test_binary):  (4898,)


[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s


Accuracy on test data:  0.6923233973050225


[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


np.shape(X_train):  (19187, 9)
np.shape(y_train_binary):  (19187,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.8491165893573774


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s


np.shape(X_test):  (4813, 9)
np.shape(y_test_binary):  (4813,)


[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s


Accuracy on test data:  0.7143153958030335


[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


np.shape(X_train):  (19197, 10)
np.shape(y_train_binary):  (19197,)


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.8523206751054853


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4803, 10)
np.shape(y_test_binary):  (4803,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


Accuracy on test data:  0.7310014574224443


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


np.shape(X_train):  (19173, 20)
np.shape(y_train_binary):  (19173,)


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.8828560997235696


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4827, 20)
np.shape(y_test_binary):  (4827,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s


Accuracy on test data:  0.7634141288585042


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19224, 30)
np.shape(y_train_binary):  (19224,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.3s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.3s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.893414481897628


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.6s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4776, 30)
np.shape(y_test_binary):  (4776,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s


Accuracy on test data:  0.7784757118927973


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19296, 40)
np.shape(y_train_binary):  (19296,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.3s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.3s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9049543946932007


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.3s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.3s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4704, 40)
np.shape(y_test_binary):  (4704,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


Accuracy on test data:  0.8031462585034014


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


np.shape(X_train):  (19234, 50)
np.shape(y_train_binary):  (19234,)


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9138504731205157


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4766, 50)
np.shape(y_test_binary):  (4766,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


Accuracy on test data:  0.7943768359211079


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19178, 60)
np.shape(y_train_binary):  (19178,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.3s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9233496714985922


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.3s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.3s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4822, 60)
np.shape(y_test_binary):  (4822,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s


Accuracy on test data:  0.8187474077146413


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19174, 70)
np.shape(y_train_binary):  (19174,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9296443100031292


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4826, 70)
np.shape(y_test_binary):  (4826,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s


Accuracy on test data:  0.8350600911728139


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19275, 80)
np.shape(y_train_binary):  (19275,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9369130998702984


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4725, 80)
np.shape(y_test_binary):  (4725,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


Accuracy on test data:  0.8391534391534392


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


np.shape(X_train):  (19235, 90)
np.shape(y_train_binary):  (19235,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.4s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9390174161684429


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4765, 90)
np.shape(y_test_binary):  (4765,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


Accuracy on test data:  0.8459601259181532


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.4s finished


np.shape(X_train):  (19147, 100)
np.shape(y_train_binary):  (19147,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9406173290854964


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4853, 100)
np.shape(y_test_binary):  (4853,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


Accuracy on test data:  0.8504018133113538


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


np.shape(X_train):  (19265, 500)
np.shape(y_train_binary):  (19265,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    1.0s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.7s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.7s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9691149753438879


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.9s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.9s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4735, 500)
np.shape(y_test_binary):  (4735,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


Accuracy on test data:  0.8817317845828934


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


np.shape(X_train):  (19210, 1000)
np.shape(y_train_binary):  (19210,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.7s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.7s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


Accuracy on train data:  0.972306090577824


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.8s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4790, 1000)
np.shape(y_test_binary):  (4790,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


Accuracy on test data:  0.9073068893528183


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


np.shape(X_train):  (19075, 1500)
np.shape(y_train_binary):  (19075,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.6s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.973735255570118


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.6s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4925, 1500)
np.shape(y_test_binary):  (4925,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


Accuracy on test data:  0.8950253807106598


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


np.shape(X_train):  (19248, 2000)
np.shape(y_train_binary):  (19248,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.6s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s


Accuracy on train data:  0.9727763923524522


[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    1.5s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s


np.shape(X_test):  (4752, 2000)
np.shape(y_test_binary):  (4752,)


[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


Accuracy on test data:  0.9082491582491582


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:    0.5s finished


### Gradient boosting

In [49]:
GB_train_accuracies, GB_test_accuracies = evaluate_clf_on_lengths(model="GB", lengths=lengths)

np.shape(X_train):  (19178, 5)
np.shape(y_train_binary):  (19178,)
Accuracy on train data:  0.9999478569193868
np.shape(X_test):  (4822, 5)
np.shape(y_test_binary):  (4822,)
Accuracy on test data:  0.640812940688511
np.shape(X_train):  (19228, 6)
np.shape(y_train_binary):  (19228,)
Accuracy on train data:  0.9999479925109216
np.shape(X_test):  (4772, 6)
np.shape(y_test_binary):  (4772,)
Accuracy on test data:  0.7112321877619446
np.shape(X_train):  (19186, 7)
np.shape(y_train_binary):  (19186,)
Accuracy on train data:  1.0
np.shape(X_test):  (4814, 7)
np.shape(y_test_binary):  (4814,)
Accuracy on test data:  0.7451184046530951
np.shape(X_train):  (19153, 8)
np.shape(y_train_binary):  (19153,)
Accuracy on train data:  0.9998955777162847
np.shape(X_test):  (4847, 8)
np.shape(y_test_binary):  (4847,)
Accuracy on test data:  0.7648029709098412
np.shape(X_train):  (19208, 9)
np.shape(y_train_binary):  (19208,)
Accuracy on train data:  1.0
np.shape(X_test):  (4792, 9)
np.shape(y_test_binary)

### Logistic regression

In [51]:
LR_train_accuracies, LR_test_accuracies = evaluate_clf_on_lengths(model="LR", lengths=lengths)

np.shape(X_train):  (19177, 5)
np.shape(y_train_binary):  (19177,)
Accuracy on train data:  0.18934139855034676
np.shape(X_test):  (4823, 5)
np.shape(y_test_binary):  (4823,)
Accuracy on test data:  0.18453244868339208
np.shape(X_train):  (19184, 6)
np.shape(y_train_binary):  (19184,)
Accuracy on train data:  0.19234778982485404
np.shape(X_test):  (4816, 6)
np.shape(y_test_binary):  (4816,)
Accuracy on test data:  0.1877076411960133
np.shape(X_train):  (19135, 7)
np.shape(y_train_binary):  (19135,)
Accuracy on train data:  0.20329239613274105
np.shape(X_test):  (4865, 7)
np.shape(y_test_binary):  (4865,)
Accuracy on test data:  0.20513874614594038
np.shape(X_train):  (19112, 8)
np.shape(y_train_binary):  (19112,)
Accuracy on train data:  0.2023336123901214
np.shape(X_test):  (4888, 8)
np.shape(y_test_binary):  (4888,)
Accuracy on test data:  0.20130932896890344
np.shape(X_train):  (19226, 9)
np.shape(y_train_binary):  (19226,)
Accuracy on train data:  0.24009154270259025
np.shape(X_tes

### SVM

In [53]:
SVM_train_accuracies, SVM_test_accuracies = evaluate_clf_on_lengths(model="SVM", lengths=lengths)

np.shape(X_train):  (19175, 5)
np.shape(y_train_binary):  (19175,)
Accuracy on train data:  0.19973924380704042
np.shape(X_test):  (4825, 5)
np.shape(y_test_binary):  (4825,)
Accuracy on test data:  0.19461139896373056
np.shape(X_train):  (19321, 6)
np.shape(y_train_binary):  (19321,)
Accuracy on train data:  0.18968997463899384
np.shape(X_test):  (4679, 6)
np.shape(y_test_binary):  (4679,)
Accuracy on test data:  0.19298995511861508
np.shape(X_train):  (19327, 7)
np.shape(y_train_binary):  (19327,)
Accuracy on train data:  0.20634345733947326
np.shape(X_test):  (4673, 7)
np.shape(y_test_binary):  (4673,)
Accuracy on test data:  0.20586347100363792
np.shape(X_train):  (19167, 8)
np.shape(y_train_binary):  (19167,)
Accuracy on train data:  0.20378776021286588
np.shape(X_test):  (4833, 8)
np.shape(y_test_binary):  (4833,)
Accuracy on test data:  0.2042209807572936
np.shape(X_train):  (19170, 9)
np.shape(y_train_binary):  (19170,)
Accuracy on train data:  0.25075639019300994
np.shape(X_te