In [1]:
import numpy as np
from joblib import load, dump
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier


# Combines individual train/test sets into 2 train/test sets and their corresponding labels: 
# Known + All Unknown and Known + Mirror Unknown
#
# Returns all unknown train and labels, all unknown test and labels, 
# mirror unknown train and labels, mirror unknown test and labels
def combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test):
    all_unknown_train = np.concatenate((known_train, unknown_train), axis=0)
    all_unknown_train_l = np.concatenate((np.ones(known_train.shape[0]), np.zeros(unknown_train.shape[0])), axis=0)
    all_unknown_test = np.concatenate((known_test, unknown_test), axis=0)
    all_unknown_test_l = np.concatenate((np.ones(known_test.shape[0]), np.zeros(unknown_test.shape[0])), axis=0)
    mirror_unknown_train = np.concatenate((known_train, mirror_train), axis=0)
    mirror_unknown_train_l = np.concatenate((np.ones(known_train.shape[0]), np.zeros(mirror_train.shape[0])), axis=0)
    mirror_unknown_test = np.concatenate((known_test, mirror_test), axis=0)
    mirror_unknown_test_l = np.concatenate((np.ones(known_test.shape[0]), np.zeros(mirror_test.shape[0])), axis=0)
    return all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l


# Load in final features train/test data (same known data always, then mirror only or all unknown sets)
# Given which subflow packet size dataset to load
#
# Returns: known train, all unknown train, mirror train, known test, all unknown test, mirror test
def load_final_train_test(N):
    # Train sets
    if N == 100:
        known_train = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Random-Known-TRAIN.npy")
    else:
        known_train = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Known-TRAIN.npy")
    mirror_train = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Mirror-TRAIN.npy")
    if N == 100:
        unknown_train = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Random-Unknown-TRAIN.npy")
    else:
        unknown_train = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Unknown-TRAIN.npy")
    # Test sets
    if N == 100:
        known_test = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Random-Known-TEST.npy")
    else:
        known_test = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Known-TEST.npy")
    mirror_test = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Mirror-TEST.npy")
    if N == 100:
        unknown_test = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Random-Unknown-TEST.npy")
    else:
        unknown_test = np.load(f"../Feature-Vectors/train-test-flows/{N}-p-subflows/Final-80-Unknown-TEST.npy")
    return known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test

# Same as above but naive features
def load_naive_train_test(N):
    # Train sets
    known_train = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Known-TRAIN.npy")
    mirror_train = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Mirror-TRAIN.npy")
    unknown_train = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Unknown-TRAIN.npy")
    # Test sets
    known_test = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Known-TEST.npy")
    mirror_test = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Mirror-TEST.npy")
    unknown_test = np.load(f"../Feature-Vectors/train-test-flows/Basic-Features/{N}-p-subflows/Unknown-TEST.npy")
    return known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test


# 2 bins: known and unknown labels
# Known Bin Known Likelihood: "empirical accuracy" aka how many known samples were labelled positive/known
# Known Bin Unknown Likelihoods: how many known samples were labelled negative/unknown
# same applied to unknown label for Unknown Bin Likelihoods
#
# Returns known, unknown arrays of known, unknown likelihoods. Format: [unknown_likelihood, known_likelihood]
def label_bins(model, unknown_train, known_train):
    # Known bin likelihoods
    k_predictions = model.predict(known_train)
    k_label_count = np.count_nonzero(k_predictions == 1)
    u_label_count = np.count_nonzero(k_predictions == 0)
    known_bin_ls = np.array([u_label_count/len(known_train), k_label_count/len(known_train)])
    # Unknown bin likelihoods
    u_predictions = model.predict(unknown_train)
    k_label_count = np.count_nonzero(u_predictions == 1)
    u_label_count = np.count_nonzero(u_predictions == 0)
    unknown_bin_ls = np.array([u_label_count/len(unknown_train), k_label_count/len(unknown_train)])
    return known_bin_ls, unknown_bin_ls

### Naive Features

In [5]:
############################# 25 PACKET SUBFLOWS - TREE
N = 25; 
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_naive_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
name = '25_M_Tree'
# Train GBDT with best hyperparams on full train set 
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5).fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(tree, f'Models/{name}')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(tree, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = '25_U_Tree'
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5).fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(tree, f'Models/{name}')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(tree, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [3.66822594e-05 9.99963318e-01]
All Unknown Unknown Likelihoods: [9.99841801e-01 1.58199017e-04]


In [39]:
############################# 25 PACKET SUBFLOWS - GBDT TRAIN
N = 25; 
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_naive_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
name = '25_M_n'
# Train GBDT with best hyperparams on full train set 
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.1, max_depth=1, min_samples_leaf=1)\
    .fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(gbdt, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = '25_U_n'
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.5, max_depth=4, min_samples_leaf=8)\
    .fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(gbdt, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [1.73008865e-04 9.99826991e-01]
All Unknown Unknown Likelihoods: [9.99522578e-01 4.77422034e-04]


In [6]:
############################# 100 PACKET SUBFLOWS - TREE
N = 100; 
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_naive_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
name = f'{N}_M_Tree'
# Train GBDT with best hyperparams on full train set 
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5).fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(tree, f'Models/{name}')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(tree, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = f'{N}_U_Tree'
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5).fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(tree, f'Models/{name}')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(tree, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [3.28536698e-05 9.99967146e-01]
All Unknown Unknown Likelihoods: [9.99944300e-01 5.56996907e-05]


In [40]:
############################# 100 PACKET SUBFLOWS - GBDT
N = 100 
name = '100_M_n'
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_naive_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
# Train GBDT with best hyperparams on full train set # TODO: SWITCH TO BEST HYPERPARAMS
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.1, max_depth=1, min_samples_leaf=1)\
    .fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(gbdt, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = '100_U_n'
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.5, max_depth=4, min_samples_leaf=8)\
    .fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(gbdt, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [3.72341591e-05 9.99962766e-01]
All Unknown Unknown Likelihoods: [9.99847559e-01 1.52441259e-04]


In [7]:
############################# 1000 PACKET SUBFLOWS - TREE
N = 1000; 
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_naive_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
name = f'{N}_M_Tree'
# Train GBDT with best hyperparams on full train set 
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5).fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(tree, f'Models/{name}')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(tree, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = f'{N}_U_Tree'
tree = DecisionTreeClassifier(max_depth=20, min_samples_split=5).fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(tree, f'Models/{name}')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(tree, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [6.57938022e-05 9.99934206e-01]
All Unknown Unknown Likelihoods: [9.99935550e-01 6.44496004e-05]


In [41]:
############################# 1000 PACKET SUBFLOWS - GBDT
N = 1000 
name = '1000_M_n'
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_naive_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
# Train GBDT with best hyperparams on full train set # TODO: SWITCH TO BEST HYPERPARAMS
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.1, max_depth=1, min_samples_leaf=1)\
    .fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(gbdt, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = '1000_U_n'
# TODO: SWITCH TO BEST HYPERPARAMS
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.5, max_depth=4, min_samples_leaf=8)\
    .fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(gbdt, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [8.11456894e-04 9.99188543e-01]
All Unknown Unknown Likelihoods: [0.99871101 0.00128899]


### Final Features 

In [34]:
############################# 25 PACKET SUBFLOWS - GBDT TRAIN (FINAL FEATURES ALL BELOW)
N = 25; 
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_final_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
name = '25_M'
# Train GBDT with best hyperparams on full train set 
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.1, max_depth=1, min_samples_leaf=1)\
    .fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(gbdt, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = '25_U'
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.5, max_depth=4, min_samples_leaf=8)\
    .fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(gbdt, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [5.47496408e-07 9.99999453e-01]
All Unknown Unknown Likelihoods: [9.99997881e-01 2.11873684e-06]


In [35]:
############################# 100 PACKET SUBFLOWS - GBDT
N = 100 
name = '100_M'
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_final_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
# Train GBDT with best hyperparams on full train set # TODO: SWITCH TO BEST HYPERPARAMS
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.1, max_depth=1, min_samples_leaf=1)\
    .fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(gbdt, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = '100_U'
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.5, max_depth=4, min_samples_leaf=8)\
    .fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(gbdt, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [0.00110607 0.99889393]
All Unknown Unknown Likelihoods: [0.99883617 0.00116383]


In [36]:
############################# 1000 PACKET SUBFLOWS - GBDT
N = 1000 
name = '1000_M'
known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test = load_final_train_test(N)
all_unknown_train, all_unknown_train_l, all_unknown_test, all_unknown_test_l, \
            mirror_unknown_train, mirror_unknown_train_l, mirror_unknown_test, mirror_unknown_test_l =\
combine_train_test(known_train, unknown_train, mirror_train, known_test, unknown_test, mirror_test)

# MIRROR UNKNOWN
# Train GBDT with best hyperparams on full train set # TODO: SWITCH TO BEST HYPERPARAMS
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.1, max_depth=1, min_samples_leaf=1)\
    .fit(mirror_unknown_train, mirror_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
mirror_known_bins, mirror_unknown_bins = label_bins(gbdt, mirror_train, known_train)
print(f"Mirror Known Likelihoods: {mirror_known_bins}")
print(f"Mirror Unknown Likelihoods: {mirror_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", mirror_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", mirror_unknown_bins)


# ALL UNKNOWN 
name = '1000_U'
# TODO: SWITCH TO BEST HYPERPARAMS
gbdt = GradientBoostingClassifier(random_state=0, learning_rate=0.5, max_depth=4, min_samples_leaf=8)\
    .fit(all_unknown_train, all_unknown_train_l)
# Saving model
dump(gbdt, f'Models/{name}_GBDT')
# Get known and unknown bins and likelihoods 
u_known_bins, u_unknown_bins = label_bins(gbdt, unknown_train, known_train)
print(f"All Unknown Known Likelihoods: {u_known_bins}")
print(f"All Unknown Unknown Likelihoods: {u_unknown_bins}")
np.save(f"Models/{name}_Known_Bin_Ls", u_known_bins)
np.save(f"Models/{name}_Unknown_Bin_Ls", u_unknown_bins)

Mirror Known Likelihoods: [0. 1.]
Mirror Unknown Likelihoods: [1. 0.]
All Unknown Known Likelihoods: [0. 1.]
All Unknown Unknown Likelihoods: [1. 0.]
