In [19]:
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
import numpy as np
from joblib import load
from numpy import genfromtxt
import pickle
import pandas as pd
import os
from tqdm import tqdm

In [3]:
# Import Model
def import_model():
    list_model = []
    path = './models'
    for i in os.listdir(path):
        list_model.append(i)
    print(list_model)
    return list_model

In [4]:
# Import normal Dataset
def import_dataset():
    with open('./Dataset/csv/Attack_merge.csv', newline='') as csvfile:
        rows = pd.read_csv(csvfile,header=None)
        y = rows[10]
        x = rows.drop([10], axis=1)
        X_train, X_test, y_train, y_test = train_test_split(x, y , test_size=0.2)
    return X_train, X_test, y_train, y_test

In [5]:
import itertools
def combinations(list_model, num):
    return [x for x in itertools.combinations(list_model, num)]

In [6]:
def kappa_statistics(pred_labels, y_test):
    ks = 0
    length = len(pred_labels)
    for n in range(len(pred_labels[0])):
        # value_b = ''
        for o in range(0, length):
            if o == 0:
                continue
            elif pred_labels[o][n] != pred_labels[o-1][n]:
                break
            elif o == length-1:
                ks += 1
            else:
                continue
    ks_percentage = ks/len(y_test) * 100
    #print("KAPPA STATS: " + str(ks_percentage))

    return ks_percentage

In [7]:
def double_fault(pred_labels, y_test):
    wrong_gt = ([l for l, m in enumerate(y_test) if m != pred_labels[0][l]])
    df = 0
    length = len(pred_labels)
    for n, o in enumerate(wrong_gt):
        for p in range(0, length):
            if p == 0:
                continue
            elif pred_labels[p][o] != pred_labels[p-1][o]:
                break
            elif p == length-1:
                df += 1
            else:
                continue
    df_percentage = df/len(y_test) * 100
    #print("DOUBLE FAULT: " + str(df_percentage))
    
    return df_percentage

In [17]:
def prediction_label_prob(model, X_test, X_test_ex, mode=0):
    path = str("./models/")
    if model == "CNN.h5" or model == "LSTM.h5":
        clf = tf.keras.models.load_model(path + model)
        prediction = clf.predict(X_test_ex)
        #prediction = clf.predict_proba(X_test_ex)
        prediction_prob = tf.keras.models.load_model(path + model).predict(X_test_ex).flatten()

    elif model == "DNN4.h5":
        prediction = tf.keras.models.load_model(path + model).predict(X_test)
        prediction_prob = tf.keras.models.load_model(path + model).predict(X_test)[0:,1]
        prediction = prediction[0:,1]

    elif model == "SVM.joblib":
        prediction = load(path + model).predict(X_test)
        prediction_prob = load(path + model).predict(X_test)
        #prediction_prob = load(path + model)._predict_proba_lr(X_test)[0:,1]
        
    else:
        prediction = load(path + model).predict(X_test)
        prediction_prob = load(path + model).predict(X_test)

    if mode == 0:
        return np.rint(prediction)
    else:
        return prediction_prob

In [9]:
def voting(good_team, X_test, X_test_ex, y_test):
    #soft voting
    f1_score =[]
    for i in range(len(good_team)):
        #print(good_team[i])
        pred = np.array([prediction_label_prob(model, X_test, X_test_ex, mode=1) for model in good_team[i]])
        pred = np.average(pred, axis=0, weights=None)
        pred_labels = np.rint(pred)
        f1 = sklearn.metrics.f1_score(y_test, pred_labels, average="weighted")
        f1_score.append(f1)
        #print("f1_score",f1_score)

    print(good_team)
    print(f1_score)
    f1_score_max = np.max(f1_score)
    best_team_index = np.argmax(f1_score)

    return good_team[best_team_index], f1_score_max

In [20]:
X_train, X_test, y_train, y_test = import_dataset()
y_train_ex = tf.expand_dims(y_train, axis=1)
X_train_ex = tf.expand_dims(X_train, axis=2)
y_test_ex = tf.expand_dims(y_test, axis=1)
X_test_ex = tf.expand_dims(X_test, axis=2)
good_team = [('CNN.h5', 'SVM.joblib'), ('KNN.joblib', 'LR.joblib'), ('CNN.h5', 'SVM.joblib'), ('LR.joblib', 'XGB.joblib'), ('LSTM.h5', 'SVM.joblib'), ('SVM.joblib', 'XGB.joblib')]
best_team, best_team_f1 = voting(good_team, X_test, X_test_ex, y_test)
print("best_team, best_team_f1", best_team, best_team_f1)


[('CNN.h5', 'SVM.joblib'), ('KNN.joblib', 'LR.joblib'), ('CNN.h5', 'SVM.joblib'), ('LR.joblib', 'XGB.joblib'), ('LSTM.h5', 'SVM.joblib'), ('SVM.joblib', 'XGB.joblib')]
[0.9099044656629341, 0.952384477357377, 0.9099044656629341, 0.952474933752038, 0.8996352426952411, 0.952474933752038]
best_team, best_team_f1 ('LR.joblib', 'XGB.joblib') 0.952474933752038


In [26]:
def ensemble_learning(num_teams):
    good_team = []
    f1_score_team = []
    list_model = import_model()
    X_train, X_test, y_train, y_test = import_dataset()
    y_train_ex = tf.expand_dims(y_train, axis=1)
    X_train_ex = tf.expand_dims(X_train, axis=2)
    y_test_ex = tf.expand_dims(y_test, axis=1)
    X_test_ex = tf.expand_dims(X_test, axis=2)

    combination_model = combinations(list_model=list_model, num = num_teams)
    print(combination_model)
    print("num of combination model", len(combination_model))
    for j in tqdm(range(0, len(combination_model))):
        pred_labels = []
        for k in range(0, len(combination_model[j])):
            pred_labels.append(prediction_label_prob(combination_model[j][k], X_test, X_test_ex, mode=0))
            
        #print(pred_labels)
        # print(combination_model[j])
        # print(f1_inteam)
        ks_percentage = kappa_statistics(pred_labels, y_test)
        df_percentage = double_fault(pred_labels, y_test)
        
        if ks_percentage > 50 and ks_percentage < 91 and df_percentage < 50:
            good_team.append(combination_model[j])
            
    best_team, best_team_f1 = voting(good_team, X_test, X_test_ex, y_test)
    print("best_team, best_team_f1", best_team, best_team_f1)
    



In [23]:
ensemble_learning(2)

['CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'LR.joblib', 'LSTM.h5', 'RF.joblib', 'SVM.joblib', 'XGB.joblib']
[('CNN.h5', 'DNN4.h5'), ('CNN.h5', 'DT.joblib'), ('CNN.h5', 'KNN.joblib'), ('CNN.h5', 'LR.joblib'), ('CNN.h5', 'LSTM.h5'), ('CNN.h5', 'RF.joblib'), ('CNN.h5', 'SVM.joblib'), ('CNN.h5', 'XGB.joblib'), ('DNN4.h5', 'DT.joblib'), ('DNN4.h5', 'KNN.joblib'), ('DNN4.h5', 'LR.joblib'), ('DNN4.h5', 'LSTM.h5'), ('DNN4.h5', 'RF.joblib'), ('DNN4.h5', 'SVM.joblib'), ('DNN4.h5', 'XGB.joblib'), ('DT.joblib', 'KNN.joblib'), ('DT.joblib', 'LR.joblib'), ('DT.joblib', 'LSTM.h5'), ('DT.joblib', 'RF.joblib'), ('DT.joblib', 'SVM.joblib'), ('DT.joblib', 'XGB.joblib'), ('KNN.joblib', 'LR.joblib'), ('KNN.joblib', 'LSTM.h5'), ('KNN.joblib', 'RF.joblib'), ('KNN.joblib', 'SVM.joblib'), ('KNN.joblib', 'XGB.joblib'), ('LR.joblib', 'LSTM.h5'), ('LR.joblib', 'RF.joblib'), ('LR.joblib', 'SVM.joblib'), ('LR.joblib', 'XGB.joblib'), ('LSTM.h5', 'RF.joblib'), ('LSTM.h5', 'SVM.joblib'), ('LSTM.h5', 'XGB.joblib'

100%|██████████| 36/36 [03:17<00:00,  5.48s/it]


good_team [('DT.joblib', 'SVM.joblib'), ('KNN.joblib', 'LR.joblib'), ('KNN.joblib', 'SVM.joblib'), ('LR.joblib', 'XGB.joblib'), ('LSTM.h5', 'SVM.joblib'), ('SVM.joblib', 'XGB.joblib')]
[('DT.joblib', 'SVM.joblib'), ('KNN.joblib', 'LR.joblib'), ('KNN.joblib', 'SVM.joblib'), ('LR.joblib', 'XGB.joblib'), ('LSTM.h5', 'SVM.joblib'), ('SVM.joblib', 'XGB.joblib')]
[0.9536798480836417, 0.9536372584264552, 0.9536372584264552, 0.9536372584264552, 0.8987115810705797, 0.9536372584264552]
best_team, best_team_f1 ('DT.joblib', 'SVM.joblib') 0.9536798480836417


In [24]:
ensemble_learning(3)

['CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'LR.joblib', 'LSTM.h5', 'RF.joblib', 'SVM.joblib', 'XGB.joblib']
[('CNN.h5', 'DNN4.h5', 'DT.joblib'), ('CNN.h5', 'DNN4.h5', 'KNN.joblib'), ('CNN.h5', 'DNN4.h5', 'LR.joblib'), ('CNN.h5', 'DNN4.h5', 'LSTM.h5'), ('CNN.h5', 'DNN4.h5', 'RF.joblib'), ('CNN.h5', 'DNN4.h5', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'XGB.joblib'), ('CNN.h5', 'DT.joblib', 'KNN.joblib'), ('CNN.h5', 'DT.joblib', 'LR.joblib'), ('CNN.h5', 'DT.joblib', 'LSTM.h5'), ('CNN.h5', 'DT.joblib', 'RF.joblib'), ('CNN.h5', 'DT.joblib', 'SVM.joblib'), ('CNN.h5', 'DT.joblib', 'XGB.joblib'), ('CNN.h5', 'KNN.joblib', 'LR.joblib'), ('CNN.h5', 'KNN.joblib', 'LSTM.h5'), ('CNN.h5', 'KNN.joblib', 'RF.joblib'), ('CNN.h5', 'KNN.joblib', 'SVM.joblib'), ('CNN.h5', 'KNN.joblib', 'XGB.joblib'), ('CNN.h5', 'LR.joblib', 'LSTM.h5'), ('CNN.h5', 'LR.joblib', 'RF.joblib'), ('CNN.h5', 'LR.joblib', 'SVM.joblib'), ('CNN.h5', 'LR.joblib', 'XGB.joblib'), ('CNN.h5', 'LSTM.h5', 'RF.joblib'), ('CNN.h5', 'LSTM.h5'

100%|██████████| 84/84 [11:37<00:00,  8.30s/it]


good_team [('CNN.h5', 'DT.joblib', 'SVM.joblib'), ('CNN.h5', 'KNN.joblib', 'LR.joblib'), ('CNN.h5', 'KNN.joblib', 'SVM.joblib'), ('CNN.h5', 'LR.joblib', 'XGB.joblib'), ('CNN.h5', 'LSTM.h5', 'SVM.joblib'), ('CNN.h5', 'SVM.joblib', 'XGB.joblib'), ('DNN4.h5', 'DT.joblib', 'LR.joblib'), ('DNN4.h5', 'DT.joblib', 'SVM.joblib'), ('DNN4.h5', 'KNN.joblib', 'LR.joblib'), ('DNN4.h5', 'KNN.joblib', 'SVM.joblib'), ('DNN4.h5', 'LR.joblib', 'LSTM.h5'), ('DNN4.h5', 'LR.joblib', 'XGB.joblib'), ('DNN4.h5', 'LSTM.h5', 'SVM.joblib'), ('DNN4.h5', 'SVM.joblib', 'XGB.joblib'), ('DT.joblib', 'KNN.joblib', 'LR.joblib'), ('DT.joblib', 'KNN.joblib', 'SVM.joblib'), ('DT.joblib', 'LR.joblib', 'LSTM.h5'), ('DT.joblib', 'LR.joblib', 'SVM.joblib'), ('DT.joblib', 'LR.joblib', 'XGB.joblib'), ('DT.joblib', 'LSTM.h5', 'SVM.joblib'), ('DT.joblib', 'RF.joblib', 'SVM.joblib'), ('DT.joblib', 'SVM.joblib', 'XGB.joblib'), ('KNN.joblib', 'LR.joblib', 'LSTM.h5'), ('KNN.joblib', 'LR.joblib', 'RF.joblib'), ('KNN.joblib', 'LR.jobli

In [25]:
ensemble_learning(5)

['CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'LR.joblib', 'LSTM.h5', 'RF.joblib', 'SVM.joblib', 'XGB.joblib']
[('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'LR.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'LSTM.h5'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'RF.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'XGB.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'LSTM.h5'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'RF.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'XGB.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LSTM.h5', 'RF.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LSTM.h5', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LSTM.h5', 'XGB.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'RF.joblib', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'RF.joblib', 'XGB.joblib'), ('CNN.h5', 'DN

100%|██████████| 126/126 [30:04<00:00, 14.32s/it]


good_team [('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'LR.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'LSTM.h5'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'KNN.joblib', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'LSTM.h5'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'RF.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LR.joblib', 'XGB.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LSTM.h5', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'LSTM.h5', 'XGB.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'RF.joblib', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'DT.joblib', 'SVM.joblib', 'XGB.joblib'), ('CNN.h5', 'DNN4.h5', 'KNN.joblib', 'LR.joblib', 'LSTM.h5'), ('CNN.h5', 'DNN4.h5', 'KNN.joblib', 'LR.joblib', 'RF.joblib'), ('CNN.h5', 'DNN4.h5', 'KNN.joblib', 'LR.joblib', 'SVM.joblib'), ('CNN.h5', 'DNN4.h5', 'KNN.joblib', 'LR.joblib', 'XGB.joblib'), ('CNN.h5', 'DNN4.h5', 'KNN.joblib', 'LSTM.h5', 'RF.j