In [8]:
import numpy as np
from joblib import load
import time
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import OneClassSVM
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle


# Load in train/test data (same known data always, then mirror only or all unknown sets)
# Given which subflow packet size dataset to load
#
# Returns: known train, all unknown train, mirror train, known test, all unknown test, mirror test
def load_train_test(N):
    # Train sets
    known_train = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Known-TRAIN.npy")
    mirror_train = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Mirror-TRAIN.npy")
    unknown_train = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Unknown-TRAIN.npy")
    # Test sets
    known_test = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Known-TEST.npy")
    mirror_test = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Mirror-TEST.npy")
    unknown_test = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Unknown-TEST.npy")
    return known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test


# Combines individual train/test sets into 2 train/test sets and their corresponding labels: 
# Known + All Unknown and Known + Mirror Unknown
#
# Returns all unknown train and labels, all unknown test and labels, 
# mirror unknown train and labels, mirror unknown test and labels
def combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test):
    all_unknown_train = np.concatenate((known_train, unknown_train), axis=0)
    all_unknown_train_l = np.concatenate((np.ones(known_train.shape[0]), np.zeros(unknown_train.shape[0])), axis=0)
    all_unknown_test = np.concatenate((known_test, unknown_test), axis=0)
    all_unknown_test_l = np.concatenate((np.ones(known_test.shape[0]), np.zeros(unknown_test.shape[0])), axis=0)
    mirror_unknown_train = np.concatenate((known_train, mirror_train), axis=0)
    mirror_unknown_train_l = np.concatenate((np.ones(known_train.shape[0]), np.zeros(mirror_train.shape[0])), axis=0)
    mirror_unknown_test = np.concatenate((known_test, mirror_test), axis=0)
    mirror_unknown_test_l = np.concatenate((np.ones(known_test.shape[0]), np.zeros(mirror_test.shape[0])), axis=0)
    return all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l


# Converts -1, 1 labels to 0, 1 for SVM
def convert_SVM_labels(test):
    unknown_inds = np.where(test == -1)
    test[unknown_inds] = 0
    return test


# Get subflow classifications for all flows in given flow dictionary, 
# Given a regression model (bins, label likelihoods, KNN)
def maj_vote_flows(flow_dict, model, label):
    flows = list(flow_dict.keys())
    uncertain = 0
    correct = 0
    for flow in flow_dict:
        prediction = 0
        subflows = flow_dict[flow]
        # Classify all subflow feature vectors of a flow
        predictions = model.predict(subflows)
        # Classify flow by majority vote (win by 2)
        label_counts = np.unique(predictions, return_counts=True)[1]
        if len(label_counts) == 2:
            if label_counts[0] >= label_counts[1] + 2:
                prediction = 0
            elif label_counts[1] >= label_counts[0] + 2:
                prediction = 1
            else:
                # uncertain ...
                uncertain += 1
                continue
        else:
            # if only one label for all subflows, label flow as that label
            prediction = predictions[0]
            if prediction == -1:
                prediction = 0
        if label == prediction:
            correct += 1
    acc = correct/len(flow_dict)
    return flows, acc, uncertain


# Method to classify data as known or unknown based on neighbors' labelS
# majority of neighbors' label is classification for subflow
# Uses k in constructor of passed in knn
# Returns accuracy, given test data & model
def knn_classify(knn, train_labels, test_data, test_labels):
    neihbor_inds = knn.kneighbors(test_data)[1]
    correct = 0    
    for i in range(neihbor_inds.shape[0]):
        neighbors = neihbor_inds[i]
        test_label = test_labels[i]
        known_count = 0
        unknown_count = 0
        for n_ind in neighbors:
            if train_labels[n_ind] == 1:
                known_count += 1
            else: 
                unknown_count += 1
        majority = 1 if known_count > unknown_count else 0
        if majority == test_label:
            correct += 1
    acc = correct / test_data.shape[0]
    return acc

### 25 Packet Subflows - All Classifiers

In [19]:
# Training data creation & classifier initialization
N = 25
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)
# Randomly select 50,000 known and unknown for SVM train set
known_train_svm = np.copy(known_train); np.random.shuffle(known_train_svm); known_train_svm = known_train_svm[:50000]
unknown_train_svm = np.copy(unknown_train); np.random.shuffle(unknown_train_svm); unknown_train_svm = unknown_train_svm[:50000]
all_unknown_train_svm = np.concatenate((known_train_svm, unknown_train_svm), axis=0)
mirror_train_svm = np.copy(mirror_train); np.random.shuffle(mirror_train_svm); mirror_train_svm = mirror_train_svm[:50000]
mirror_unknown_train_svm = np.concatenate((known_train_svm, mirror_train_svm), axis=0)

knn = NearestNeighbors(n_neighbors=3, n_jobs=-1)
nb = GaussianNB()
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5)
# Note: SVM is unsupervised, fit takes no labels and detects boundary for in/outliter detection
# MUST USE -1 AND 1 AS SVM LABELS
svm = OneClassSVM()


################################# KNN Avg. 3 NN train/classification

# ALL UNKNOWN DATASET
knn_all_unknown_train, knn_all_unknown_train_l = shuffle(all_unknown_train, all_unknown_train_l)
knn_all_unknown_train = knn_all_unknown_train[:10000]; knn_all_unknown_train_l = knn_all_unknown_train_l[:10000] 
knn.fit(knn_all_unknown_train)
all_u_acc = knn_classify(knn, knn_all_unknown_train_l, all_unknown_test, all_unknown_test_l)
print(f"KNN Classification {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
knn_mirror_unknown_train, knn_mirror_unknown_train_l = shuffle(mirror_unknown_train, mirror_unknown_train_l)
knn_mirror_unknown_train = knn_mirror_unknown_train[:10000]; knn_mirror_unknown_train_l = knn_mirror_unknown_train_l[:10000]
knn.fit(knn_mirror_unknown_train)
mirror_u_acc = knn_classify(knn, knn_mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l)
print(f"KNN Classification {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


################################# Naive Bayes train/classification 

# ALL UNKNOWN DATASET
nb_unknown_predicts = nb.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, nb_unknown_predicts)
print(f"Naive Bayes {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
nb_mirror_predicts = nb.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, nb_mirror_predicts)
print(f"Naive Bayes {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


################################# Decision Tree train/classification 

# ALL UNKNOWN DATASET
t_unknown_predicts = tree.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, t_unknown_predicts)
print(f"Decision Tree {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
t_mirror_predicts = tree.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, t_mirror_predicts)
print(f"Decision Tree {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


######################################## GBDT train/classification 

gbdt = GradientBoostingClassifier(random_state=0)
# ALL UNKNOWN DATASET
t_unknown_predicts = gbdt.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, t_unknown_predicts)
print(f"GBDT {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
t_mirror_predicts = gbdt.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, t_mirror_predicts)
print(f"GBDT {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


################################## SVM train/classification

# ALL UNKNOWN DATASET
# Timing SVM training - 30 min to train on 100k training set
start = time.time()
svm.fit(all_unknown_train_svm)
end = time.time()
print(f"Time taken to train SVM on all unknown data: {end - start}")
svm_unknown_predicts = svm.predict(all_unknown_test)
# Converting 0, 1 labels to -1, 1 in test set 
all_unknown_labels = convert_SVM_labels(all_unknown_test_l)
all_u_acc = accuracy_score(all_unknown_labels, svm_unknown_predicts)
print(f"SVM {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
start = time.time()
svm.fit(mirror_unknown_train_svm)
end = time.time()
print(f"Time taken to train SVM on mirror unknown data: {end - start}")
svm_mirror_predicts = svm.predict(mirror_unknown_test)
# Converting 0, 1 labels to -1, 1 in test set 
mirror_unknown_labels = convert_SVM_labels(mirror_unknown_test_l)
mirror_u_acc = accuracy_score(mirror_unknown_labels, svm_mirror_predicts)
print(f"SVM {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}")

KNN Classification 25-P-Subflows ALL UNKNOWN Accuracy: 0.9832348064662574
KNN Classification 25-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

Naive Bayes 25-P-Subflows ALL UNKNOWN Accuracy: 0.8341257426524288
Naive Bayes 25-P-Subflows MIRROR UNKNOWN Accuracy: 0.9841403983689794

Decision Tree 25-P-Subflows ALL UNKNOWN Accuracy: 0.999416487377376
Decision Tree 25-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

GBDT 25-P-Subflows ALL UNKNOWN Accuracy: 0.9969208297968932
GBDT 25-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

Time taken to train SVM on all unknown data: 1270.4506559371948
SVM 25-P-Subflows ALL UNKNOWN Accuracy: 0.310374434992006
Time taken to train SVM on mirror unknown data: 767.7876553535461
SVM 25-P-Subflows MIRROR UNKNOWN Accuracy: 0.127145475754041


### 100 Packet Subflows

In [18]:
# Training data creation & classifier initialization
N = 100
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)
# Randomly select 50,000 known and unknown for SVM train set
known_train_svm = np.copy(known_train); np.random.shuffle(known_train_svm); known_train_svm = known_train_svm[:50000]
unknown_train_svm = np.copy(unknown_train); np.random.shuffle(unknown_train_svm); unknown_train_svm = unknown_train_svm[:50000]
all_unknown_train_svm = np.concatenate((known_train_svm, unknown_train_svm), axis=0)
mirror_train_svm = np.copy(mirror_train); np.random.shuffle(mirror_train_svm); mirror_train_svm = mirror_train_svm[:50000]
mirror_unknown_train_svm = np.concatenate((known_train_svm, mirror_train_svm), axis=0)

nb = GaussianNB()
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5)
# Note: SVM is unsupervised, fit takes no labels and detects boundary for in/outliter detection
# MUST USE -1 AND 1 AS SVM LABELS
svm = OneClassSVM()

################################# KNN Avg. 3 NN train/classification

# ALL UNKNOWN DATASET
knn_all_unknown_train, knn_all_unknown_train_l = shuffle(all_unknown_train, all_unknown_train_l)
knn_all_unknown_train = knn_all_unknown_train[:10000]; knn_all_unknown_train_l = knn_all_unknown_train_l[:10000] 
knn.fit(knn_all_unknown_train)
all_u_acc = knn_classify(knn, knn_all_unknown_train_l, all_unknown_test, all_unknown_test_l)
print(f"KNN Classification {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
knn_mirror_unknown_train, knn_mirror_unknown_train_l = shuffle(mirror_unknown_train, mirror_unknown_train_l)
knn_mirror_unknown_train = knn_mirror_unknown_train[:10000]; knn_mirror_unknown_train_l = knn_mirror_unknown_train_l[:10000]
knn.fit(knn_mirror_unknown_train)
mirror_u_acc = knn_classify(knn, knn_mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l)
print(f"KNN Classification {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


################################# Naive Bayes train/classification 

# ALL UNKNOWN DATASET
nb_unknown_predicts = nb.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, nb_unknown_predicts)
print(f"Naive Bayes {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
nb_mirror_predicts = nb.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, nb_mirror_predicts)
print(f"Naive Bayes {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


################################# Decision Tree train/classification 

# ALL UNKNOWN DATASET
t_unknown_predicts = tree.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, t_unknown_predicts)
print(f"Decision Tree {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
t_mirror_predicts = tree.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, t_mirror_predicts)
print(f"Decision Tree {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


########################################## GBDT train/classification 

gbdt = GradientBoostingClassifier(random_state=0)
# ALL UNKNOWN DATASET
t_unknown_predicts = gbdt.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, t_unknown_predicts)
print(f"GBDT {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
t_mirror_predicts = gbdt.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, t_mirror_predicts)
print(f"GBDT {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


# ################################## SVM train/classification

# ALL UNKNOWN DATASET
# Timing SVM training - 30 min to train on 100k training set
start = time.time()
svm.fit(all_unknown_train_svm)
end = time.time()
print(f"Time taken to train SVM on all unknown data: {end - start}")
svm_unknown_predicts = svm.predict(all_unknown_test)
# Converting 0, 1 labels to -1, 1 in test set 
all_unknown_labels = convert_SVM_labels(all_unknown_test_l)
all_u_acc = accuracy_score(all_unknown_labels, svm_unknown_predicts)
print(f"SVM {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
start = time.time()
svm.fit(mirror_unknown_train_svm)
end = time.time()
print(f"Time taken to train SVM on mirror unknown data: {end - start}")
svm_mirror_predicts = svm.predict(mirror_unknown_test)
# Converting 0, 1 labels to -1, 1 in test set 
mirror_unknown_labels = convert_SVM_labels(mirror_unknown_test_l)
mirror_u_acc = accuracy_score(mirror_unknown_labels, svm_mirror_predicts)
print(f"SVM {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}")

KNN Classification 100-P-Subflows ALL UNKNOWN Accuracy: 0.9871928592919467
KNN Classification 100-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

Naive Bayes 100-P-Subflows ALL UNKNOWN Accuracy: 0.8496239093370775
Naive Bayes 100-P-Subflows MIRROR UNKNOWN Accuracy: 0.9762621174164035

Decision Tree 100-P-Subflows ALL UNKNOWN Accuracy: 0.9998094473974526
Decision Tree 100-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

GBDT 100-P-Subflows ALL UNKNOWN Accuracy: 0.9981747066492829
GBDT 100-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

Time taken to train SVM on all unknown data: 1289.8000972270966
SVM 100-P-Subflows ALL UNKNOWN Accuracy: 0.3213067896901013
Time taken to train SVM on mirror unknown data: 816.174535036087
SVM 100-P-Subflows MIRROR UNKNOWN Accuracy: 0.2329198889009912


### 1000 Packet Subflows

In [17]:
# Training data creation & classifier initialization
N = 1000
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)
# Randomly select 50,000 known and unknown for SVM train set
known_train_svm = np.copy(known_train); np.random.shuffle(known_train_svm); known_train_svm = known_train_svm[:50000]
unknown_train_svm = np.copy(unknown_train); np.random.shuffle(unknown_train_svm); unknown_train_svm = unknown_train_svm[:50000]
all_unknown_train_svm = np.concatenate((known_train_svm, unknown_train_svm), axis=0)
mirror_train_svm = np.copy(mirror_train); np.random.shuffle(mirror_train_svm); mirror_train_svm = mirror_train_svm[:50000]
mirror_unknown_train_svm = np.concatenate((known_train_svm, mirror_train_svm), axis=0)

nb = GaussianNB()
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5)
# Note: SVM is unsupervised, fit takes no labels and detects boundary for in/outliter detection
# MUST USE -1 AND 1 AS SVM LABELS
svm = OneClassSVM()


################################# KNN Avg. 3 NN train/classification

# ALL UNKNOWN DATASET
knn_all_unknown_train, knn_all_unknown_train_l = shuffle(all_unknown_train, all_unknown_train_l)
knn_all_unknown_train = knn_all_unknown_train[:10000]; knn_all_unknown_train_l = knn_all_unknown_train_l[:10000] 
knn.fit(knn_all_unknown_train)
all_u_acc = knn_classify(knn, knn_all_unknown_train_l, all_unknown_test, all_unknown_test_l)
print(f"KNN Classification {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
knn_mirror_unknown_train, knn_mirror_unknown_train_l = shuffle(mirror_unknown_train, mirror_unknown_train_l)
knn_mirror_unknown_train = knn_mirror_unknown_train[:10000]; knn_mirror_unknown_train_l = knn_mirror_unknown_train_l[:10000]
knn.fit(knn_mirror_unknown_train)
mirror_u_acc = knn_classify(knn, knn_mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l)
print(f"KNN Classification {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


################################# Naive Bayes train/classification 

# ALL UNKNOWN DATASET
nb_unknown_predicts = nb.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, nb_unknown_predicts)
print(f"Naive Bayes {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
nb_mirror_predicts = nb.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, nb_mirror_predicts)
print(f"Naive Bayes {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


################################# Decision Tree train/classification 

# ALL UNKNOWN DATASET
t_unknown_predicts = tree.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, t_unknown_predicts)
print(f"Decision Tree {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
t_mirror_predicts = tree.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, t_mirror_predicts)
print(f"Decision Tree {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


########################################## GBDT train/classification 

gbdt = GradientBoostingClassifier(random_state=0)
# ALL UNKNOWN DATASET
t_unknown_predicts = gbdt.fit(all_unknown_train, all_unknown_train_l).predict(all_unknown_test)
all_u_acc = accuracy_score(all_unknown_test_l, t_unknown_predicts)
print(f"GBDT {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
t_mirror_predicts = gbdt.fit(mirror_unknown_train, mirror_unknown_train_l).predict(mirror_unknown_test)
mirror_u_acc = accuracy_score(mirror_unknown_test_l, t_mirror_predicts)
print(f"GBDT {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}\n")


# ################################## SVM train/classification

# ALL UNKNOWN DATASET
# Timing SVM training - 30 min to train on 100k training set
start = time.time()
svm.fit(all_unknown_train_svm)
end = time.time()
print(f"Time taken to train SVM on all unknown data: {end - start}")
svm_unknown_predicts = svm.predict(all_unknown_test)
# Converting 0, 1 labels to -1, 1 in test set 
all_unknown_labels = convert_SVM_labels(all_unknown_test_l)
all_u_acc = accuracy_score(all_unknown_labels, svm_unknown_predicts)
print(f"SVM {N}-P-Subflows ALL UNKNOWN Accuracy: {all_u_acc}")

# MIRROR UNKNOWN DATASET
start = time.time()
svm.fit(mirror_unknown_train_svm)
end = time.time()
print(f"Time taken to train SVM on mirror unknown data: {end - start}")
svm_mirror_predicts = svm.predict(mirror_unknown_test)
# Converting 0, 1 labels to -1, 1 in test set 
mirror_unknown_labels = convert_SVM_labels(mirror_unknown_test_l)
mirror_u_acc = accuracy_score(mirror_unknown_labels, svm_mirror_predicts)
print(f"SVM {N}-P-Subflows MIRROR UNKNOWN Accuracy: {mirror_u_acc}")

KNN Classification 1000-P-Subflows ALL UNKNOWN Accuracy: 0.9849663308451219
KNN Classification 1000-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

Naive Bayes 1000-P-Subflows ALL UNKNOWN Accuracy: 0.7762697708409458
Naive Bayes 1000-P-Subflows MIRROR UNKNOWN Accuracy: 0.9914741474147415

Decision Tree 1000-P-Subflows ALL UNKNOWN Accuracy: 0.9962415827112805
Decision Tree 1000-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

GBDT 1000-P-Subflows ALL UNKNOWN Accuracy: 0.9989037949574568
GBDT 1000-P-Subflows MIRROR UNKNOWN Accuracy: 1.0

Time taken to train SVM on all unknown data: 684.8947021961212
SVM 1000-P-Subflows ALL UNKNOWN Accuracy: 0.437333611734614
Time taken to train SVM on mirror unknown data: 378.36271810531616
SVM 1000-P-Subflows MIRROR UNKNOWN Accuracy: 0.5974972497249725
