# Import dependencies

In [1]:
import numpy as np
import pickle
import pandas as pd
import joblib

# Trajectories with mixed temporal resolutions

## Load classifier and data

In [87]:
def load_classifier_and_data(model, input_vector):
    """
    Loads trained classifier and training + test data.
    
    Arguments:
    model -- which classifier to use ("RF", "GB", "LR", "SVM")
    input_vector -- use high-dimensional ("high-dim") or low dimensional ("lwo-dim") input vectors
    
    
    Returns:
    clf -- trained classifer
    X_train -- data on which classifier was trained
    X_test -- unseen test data for evaluation
    y_train_binary - labels of train data
    y_test_binary - labels of test data
    """
    
    if model == "RF":
        if input_vector == "high-dim":
            path = "../ML-models/random-forest/short-trajectories/varied-temporal-resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "RF_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/random-forest/short-trajectories/varied-temporal-resolutions/low-dimensional-input/"
            clf = joblib.load(open(path + "RF_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    elif model == "GB":
        if input_vector == "high-dim":
            path = "../ML-models/gradient-boosting/short-trajectories/varied-temporal-resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "GB_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/gradient-boosting/short-trajectories/varied-temporal-resolutions/low-dimensional-input/"
            clf = joblib.load(open(path + "GB_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    elif model == "LR":
        if input_vector == "high-dim":
            path = "../ML-models/logistic-regression/short-trajectories/varied_temporal_resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "LR_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/logistic-regression/short-trajectories/varied_temporal_resolutions/low-dimensional-input/"
            clf = joblib.load(open(path + "LR_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    elif model == "SVM":
        if input_vector == "high-dim":
            path = "../ML-models/svm/short-trajectories/varied-temporal-resolutions/high-dimensional-input/"
            clf = joblib.load(open(path + "SVM_mixed_high_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_high_dim.npy")
            X_test = np.load(path + "X_test_mixed_high_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_high_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_high_dim.npy")
        elif input_vector == "low-dim":
            path = "../ML-models/svm/short-trajectories/varied-temporal-resolutions/low-dimensional_input/"
            clf = joblib.load(open(path + "SVM_mixed_low_dim.joblib", 'rb'))
            X_train = np.load(path + "X_train_mixed_low_dim.npy")
            X_test = np.load(path + "X_test_mixed_low_dim.npy")
            y_train_binary = np.load(path + "y_train_binary_mixed_low_dim.npy")
            y_test_binary = np.load(path + "y_test_binary_mixed_low_dim.npy")
        else:
            print("Please select valid input vector type")
            return 0
    else:
        print("Please select valid model")
        return 0
        
    return clf, X_train, X_test, y_train_binary, y_test_binary

In [88]:
# Notice that trajectories with reduced length are always classified with the high-dimensional input vectors!
clf, X_train, X_test, y_train_binary, y_test_binary = load_classifier_and_data(model="SVM", input_vector="low-dim")

## Evaluate

### Overall prediction accuracies

In [89]:
print("Accuracy on train data: ", clf.score(X_train, y_train_binary))

Accuracy on train data:  0.7954227722259618


In [90]:
print("Accuracy on test data: ", clf.score(X_test, y_test_binary))

Accuracy on test data:  0.7828034990398975


# Trajectories with reduced lengths

In [None]:
else:
            if input_vector == "low-dim":
                print("Trajectories with reduced length were always classified with high-dimensional input vectors.")
                return 0
            else:
                path = "/home/david/Studium/Doktorarbeit/Paper/Theoretical_MD_simulations/Github/ML_Classification_MD_Trajectories/ML-models/random-forest/short-trajectories/reduced-length/"
                clf = pickle.load(open(path + "RF_length_" + data_type + ".sav", 'rb'))
                    #X_train = np.load(path + "X_train_high_dim.npy")
                    #X_test = np.load(path + "X_test_high_dim.npy")
                    #y_train_binary = np.load(path + "y_train_binary_high_dim.npy")
                    #y_test_binary = np.load(path + "y_test_binary_high_dim.npy")