In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
from tensorflow import keras
from tensorflow.keras import layers, metrics
from tensorflow.keras.metrics import Accuracy
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn import model_selection

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# LOAD DATASET
feature_data =pd.read_csv("drive/MyDrive/Dataset/DataProcess/SYNandUDPLagDataProcessing.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
# ADD LABEL ENCODER
label_encoder = LabelEncoder()
feature_data[" Label"] = label_encoder.fit_transform(feature_data[" Label"])
datas_labels = label_encoder.classes_
print(datas_labels)

['BENIGN' 'Syn' 'UDP' 'UDPLag']


In [None]:
x= feature_data.drop(['Unnamed: 0.1.1','Unnamed: 0','Unnamed: 0.1','SimillarHTTP','Flow ID',' Source IP', ' Destination IP', ' Timestamp'],axis=1)
y= feature_data[' Label']

x['Flow Bytes/s'] = x['Flow Bytes/s'].round()
x['Flow Bytes/s'].fillna(0, inplace = True)
x.replace([np.inf, -np.inf], 0, inplace=True)
x = x.abs()

In [None]:
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest
#APPLY CHI SQUARE TO DATASET FEATURE

# k = 69 tells four top features to be selected
# Score function Chi2 tells the feature to be selected using Chi Square
chi_test = SelectKBest(score_func=chi2, k=69)
fit = chi_test.fit(x,y)
x = chi_test.fit_transform(x, y)
x.shape
y = y.values.reshape((-1,1))

In [None]:
# SPLIT DATASET
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

# CHECK THE SHAPE
print(x_train.shape , y_train.shape)
print(x_test.shape , y_test.shape)

# SET TOTAL FOLD
total_fold = 10

kf = KFold(n_splits = total_fold, shuffle=True)

(75000, 69) (75000, 1)
(25000, 69) (25000, 1)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [None]:
def get_stacking1():
    level0 = list()
    level0.append(('lr', LogisticRegression(penalty='l1',solver="liblinear", random_state=1, max_iter=10000, verbose=1)))
    level0.append(('cart', DecisionTreeClassifier(criterion = "entropy",random_state=1,max_depth=2)))
    level1 = LogisticRegression()
    model = StackingClassifier(estimators=level0, final_estimator=level1)
    return model

def get_stacking2():
    level0 = list()
    level0.append(('lr', LogisticRegression(penalty='l2',solver="liblinear", random_state=1, max_iter=10000)))
    level0.append(('cart', DecisionTreeClassifier(criterion="gini",random_state=1, max_depth=3)))
    level1 = LogisticRegression()
    model = StackingClassifier(estimators=level0, final_estimator=level1)
    return model

In [None]:
models = [
          RandomForestClassifier(criterion="gini", max_features='sqrt'),
          RandomForestClassifier(criterion="entropy", max_features='log2'),
          AdaBoostClassifier(algorithm='SAMME', n_estimators=50, learning_rate = 1),
          AdaBoostClassifier(algorithm="SAMME.R", n_estimators=100, learning_rate = 0.5),
          get_stacking1(),
          get_stacking2()
          ]
classifiers_name = [
                    "RandomForest before Tuning",
                    "RandomForest after Tuning",
                    "AdaBoost before Tuning",
                    "AdaBoost after Tuning",
                    "Stacking before Tuning",
                    "Stacking after Tuning"
                    ]

In [None]:
#RANDOM FOREST BEFORE TUNING

fold_number = 0                                        
cvscores = []

for train_index, test_index in kf.split(x_train):
    x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]                             
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] 
    y_train_fold = y_train_fold.reshape((-1))
    y_test_fold = y_test_fold.reshape((-1))
    model = models[0]
    model.fit(x_train_fold,y_train_fold)
    cvscores.append(model.score(x_test_fold, y_test_fold) * 100)
    fold_number+=1
    fold_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[0] + " fold" + str(fold_number) + ".sav"
    pickle.dump(model, open(fold_name, 'wb'))
    print("Done fold" + str(fold_number))

print("Model Accuracy List: " , cvscores)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[0] + ".sav"
pickle.dump(model, open(filename, 'wb'))

Done fold1
Done fold2
Done fold3
Done fold4
Done fold5
Done fold6
Done fold7
Done fold8
Done fold9
Done fold10
Model Accuracy List:  [92.4121212121212, 93.26060606060607, 92.87272727272727, 92.63030303030303, 91.97575757575758, 92.75151515151515, 92.43636363636364, 92.8969696969697, 93.30909090909091, 92.77575757575758]
92.73% (+/- 0.38%)


In [None]:
#RANDOM FOREST AFTER TUNING

fold_number = 0                                        
cvscores = []

for train_index, test_index in kf.split(x_train):
    x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]                             
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] 
    y_train_fold = y_train_fold.reshape((-1))
    y_test_fold = y_test_fold.reshape((-1))
    model = models[1]
    model.fit(x_train_fold,y_train_fold)
    cvscores.append(model.score(x_test_fold, y_test_fold) * 100)
    fold_number+=1
    fold_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[1] + " fold" + str(fold_number) + ".sav"
    pickle.dump(model, open(fold_name, 'wb'))
    print("Done fold" + str(fold_number))

print("Model Accuracy List: " , cvscores)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[1] + ".sav"
pickle.dump(model, open(filename, 'wb'))

Done fold1
Done fold2
Done fold3
Done fold4
Done fold5
Done fold6
Done fold7
Done fold8
Done fold9
Done fold10
Model Accuracy List:  [98.93333333333332, 98.83636363636363, 99.05454545454545, 98.64242424242424, 98.52121212121212, 98.44848484848485, 97.01818181818182, 97.79393939393938, 98.66666666666667, 98.54545454545455]
98.45% (+/- 0.58%)


In [None]:
#ADABOOST CLASSIFIER BEFORE TUNING

fold_number = 0                                        
cvscores = []

for train_index, test_index in kf.split(x_train):
    x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]                             
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] 
    y_train_fold = y_train_fold.reshape((-1))
    y_test_fold = y_test_fold.reshape((-1))
    model = models[2]
    model.fit(x_train_fold,y_train_fold)
    cvscores.append(model.score(x_test_fold, y_test_fold) * 100)
    fold_number+=1
    fold_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[2] + " fold" + str(fold_number) + ".sav"
    pickle.dump(model, open(fold_name, 'wb'))
    print("Done fold" + str(fold_number))

print("Model Accuracy List: " , cvscores)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[2] + ".sav"
pickle.dump(model, open(filename, 'wb'))

Done fold1
Done fold2
Done fold3
Done fold4
Done fold5
Done fold6
Done fold7
Done fold8
Done fold9
Done fold10
Model Accuracy List:  [91.32000000000001, 92.14666666666666, 86.29333333333334, 90.17333333333333, 91.64, 91.97333333333333, 84.10666666666667, 91.85333333333332, 91.38666666666667, 92.13333333333334]
90.30% (+/- 2.65%)


In [None]:
#ADABOOST CLASSIFIER AFTER TUNING

fold_number = 0                                        
cvscores = []

for train_index, test_index in kf.split(x_train):
    x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]                             
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] 
    y_train_fold = y_train_fold.reshape((-1))
    y_test_fold = y_test_fold.reshape((-1))
    model = models[3]
    model.fit(x_train_fold,y_train_fold)
    cvscores.append(model.score(x_test_fold, y_test_fold) * 100)
    fold_number+=1
    fold_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[3] + " fold" + str(fold_number) + ".sav"
    pickle.dump(model, open(fold_name, 'wb'))
    print("Done fold" + str(fold_number))

print("Model Accuracy List: " , cvscores)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
 
filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[3] + ".sav"
pickle.dump(model, open(filename, 'wb'))

Done fold1
Done fold2
Done fold3
Done fold4
Done fold5
Done fold6
Done fold7
Done fold8
Done fold9
Done fold10
Model Accuracy List:  [93.67999999999999, 95.82666666666667, 95.54666666666667, 96.06666666666666, 92.58666666666666, 95.90666666666667, 95.82666666666667, 90.30666666666667, 96.21333333333332, 93.28]
94.52% (+/- 1.88%)


In [None]:
#STACKING BEFORE TUNING 

fold_number = 0                                        
cvscores = []

for train_index, test_index in kf.split(x_train):
    x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]                             
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] 
    y_train_fold = y_train_fold.reshape((-1))
    y_test_fold = y_test_fold.reshape((-1))
    model = models[4]
    model.fit(x_train_fold,y_train_fold)
    cvscores.append(model.score(x_test_fold, y_test_fold) * 100)
    fold_number+=1
    fold_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[4] + " fold" + str(fold_number) + ".sav"
    pickle.dump(model, open(fold_name, 'wb'))
    print("Done fold" + str(fold_number))

print("Model Accuracy List: " , cvscores)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[4] + ".sav"
pickle.dump(model, open(filename, 'wb'))

Done fold1
Done fold2
Done fold3
Done fold4
Done fold5
Done fold6
Done fold7
Done fold8
Done fold9
Done fold10
Model Accuracy List:  [91.74666666666667, 92.50666666666667, 92.69333333333334, 86.50666666666666, 93.22666666666667, 93.25333333333333, 91.75999999999999, 84.08, 90.57333333333332, 72.14666666666668]
88.85% (+/- 6.28%)


In [None]:
#STACKING AFTER TUNING

fold_number = 0                                        
cvscores = []

for train_index, test_index in kf.split(x_train):
    x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]                             
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] 
    y_train_fold = y_train_fold.reshape((-1))
    y_test_fold = y_test_fold.reshape((-1))
    model = models[5]
    model.fit(x_train_fold,y_train_fold)
    cvscores.append(model.score(x_test_fold, y_test_fold) * 100)
    fold_number+=1
    fold_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[5] + " fold" + str(fold_number) + ".sav"
    pickle.dump(model, open(fold_name, 'wb'))
    print("Done fold" + str(fold_number))

print("Model Accuracy List: " , cvscores)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[5] + ".sav"
pickle.dump(model, open(filename, 'wb'))

Done fold1
Done fold2
Done fold3
Done fold4
Done fold5
Done fold6
Done fold7
Done fold8
Done fold9
Done fold10
Model Accuracy List:  [97.24000000000001, 96.96000000000001, 96.09333333333333, 97.06666666666666, 97.09333333333333, 97.62666666666667, 97.68, 97.84, 97.24000000000001, 97.62666666666667]
97.25% (+/- 0.48%)


In [None]:
#TRAINING DATA SCORES OF RANDOM FOREST

file_randomforest_beforetuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[0] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_randomforest_beforetuning_data = [(pickle.load(open(file_randomforest_beforetuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_randomforest_beforetuning_data = [(loaded_randomforest_beforetuning_data[yp].predict(x_train)) for yp in range(total_fold)] 
y_pred_values_randomforest_beforetuning_data = [y_pred_randomforest_beforetuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_randomforest_beforetuning_data[hehee] = np.reshape(y_pred_values_randomforest_beforetuning_data[hehee],(-1,1))
result_randomforest_beforetuning_data = [(classification_report(y_train, y_pred_values_randomforest_beforetuning_data[res], output_dict = True, zero_division=0)) for res in range(total_fold)]

file_randomforest_aftertuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[1] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_randomforest_aftertuning_data = [(pickle.load(open(file_randomforest_aftertuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_randomforest_aftertuning_data = [(loaded_randomforest_aftertuning_data[yp].predict(x_train)) for yp in range(total_fold)] 
y_pred_values_randomforest_aftertuning_data = [y_pred_randomforest_aftertuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_randomforest_aftertuning_data[hehee] = np.reshape(y_pred_values_randomforest_aftertuning_data[hehee],(-1,1))
result_randomforest_aftertuning_data = [(classification_report(y_train, y_pred_values_randomforest_aftertuning_data[res], output_dict = True)) for res in range(total_fold)]

In [None]:
#TRAINING DATA SCORES OF ADABOOST

file_adaboost_beforetuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[2] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_adaboost_beforetuning_data = [(pickle.load(open(file_adaboost_beforetuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_adaboost_beforetuning_data = [(loaded_adaboost_beforetuning_data[yp].predict(x_train)) for yp in range(total_fold)] 
y_pred_values_adaboost_beforetuning_data = [y_pred_adaboost_beforetuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_adaboost_beforetuning_data[hehee] = np.reshape(y_pred_values_adaboost_beforetuning_data[hehee],(-1,1))
result_adaboost_beforetuning_data = [(classification_report(y_train, y_pred_values_adaboost_beforetuning_data[res], output_dict = True)) for res in range(total_fold)]

file_adaboost_aftertuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[3] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_adaboost_aftertuning_data = [(pickle.load(open(file_adaboost_aftertuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_adaboost_aftertuning_data = [(loaded_adaboost_aftertuning_data[yp].predict(x_train)) for yp in range(total_fold)] 
y_pred_values_adaboost_aftertuning_data = [y_pred_adaboost_aftertuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_adaboost_aftertuning_data[hehee] = np.reshape(y_pred_values_adaboost_aftertuning_data[hehee],(-1,1))
result_adaboost_aftertuning_data = [(classification_report(y_train, y_pred_values_adaboost_aftertuning_data[res], output_dict = True)) for res in range(total_fold)]

In [None]:
#TRAINING DATA SCORES OF STACKING

file_stacking_beforetuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[4] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_stacking_beforetuning_data = [(pickle.load(open(file_stacking_beforetuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_stacking_beforetuning_data = [(loaded_stacking_beforetuning_data[yp].predict(x_train)) for yp in range(total_fold)] 
y_pred_values_stacking_beforetuning_data = [y_pred_stacking_beforetuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_stacking_beforetuning_data[hehee] = np.reshape(y_pred_values_stacking_beforetuning_data[hehee],(-1,1))
result_stacking_beforetuning_data = [(classification_report(y_train, y_pred_values_stacking_beforetuning_data[res], output_dict = True, zero_division=0)) for res in range(total_fold)]

file_stacking_aftertuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[5] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_stacking_aftertuning_data = [(pickle.load(open(file_stacking_aftertuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_stacking_aftertuning_data = [(loaded_stacking_aftertuning_data[yp].predict(x_train)) for yp in range(total_fold)] 
y_pred_values_stacking_aftertuning_data = [y_pred_stacking_aftertuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_stacking_aftertuning_data[hehee] = np.reshape(y_pred_values_stacking_aftertuning_data[hehee],(-1,1))
result_stacking_aftertuning_data = [(classification_report(y_train, y_pred_values_stacking_aftertuning_data[res], output_dict = True, zero_division=0)) for res in range(total_fold)]

In [None]:
training_precision_list_before = {}
training_precision_list_after = {}
training_recall_list_before = {}
training_recall_list_after = {}
training_accuracy_list_before = {}
training_accuracy_list_after = {}
training_f1_list_before = {}
training_f1_list_after = {}

In [None]:
for scr in range(total_fold):
  training_precision_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['macro avg']['precision'] , result_adaboost_beforetuning_data[scr]['macro avg']['precision'], result_stacking_beforetuning_data[scr]['macro avg']['precision']]
  training_precision_list_after[(scr+1)] = [result_randomforest_aftertuning_data[scr]['macro avg']['precision'] , result_adaboost_aftertuning_data[scr]['macro avg']['precision'], result_stacking_aftertuning_data[scr]['macro avg']['precision']]
  training_recall_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['macro avg']['recall'] , result_adaboost_beforetuning_data[scr]['macro avg']['recall'], result_stacking_beforetuning_data[scr]['macro avg']['recall']]
  training_recall_list_after[(scr+1)] = [result_randomforest_aftertuning_data[scr]['macro avg']['recall'] , result_adaboost_aftertuning_data[scr]['macro avg']['recall'], result_stacking_aftertuning_data[scr]['macro avg']['recall']]
  training_accuracy_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['accuracy'] , result_adaboost_beforetuning_data[scr]['accuracy'], result_stacking_beforetuning_data[scr]['accuracy']]
  training_accuracy_list_after[(scr+1)] = [result_randomforest_aftertuning_data[scr]['accuracy'] , result_adaboost_aftertuning_data[scr]['accuracy'], result_stacking_aftertuning_data[scr]['accuracy']]
  training_f1_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['macro avg']['f1-score'] , result_adaboost_beforetuning_data[scr]['macro avg']['f1-score'], result_stacking_beforetuning_data[scr]['macro avg']['f1-score']]
  training_f1_list_after [(scr+1)] = [result_randomforest_aftertuning_data[scr]['macro avg']['f1-score'] , result_adaboost_aftertuning_data[scr]['macro avg']['f1-score'], result_stacking_aftertuning_data[scr]['macro avg']['f1-score']]

In [None]:
print("{:<1} {:<0}".format("","Precision K-Fold Training Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_precision_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","Precision K-Fold Training Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_precision_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

print("{:<1} {:<0}".format("","Recall K-Fold Training Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_recall_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","Recall K-Fold Training Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_recall_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

print("{:<1} {:<0}".format("","Accuracy K-Fold Training Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_accuracy_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","Accuracy K-Fold  Training Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_accuracy_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

print("{:<1} {:<0}".format("","F1-Score K-Fold Training Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_f1_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","F1-Score K-Fold  Training Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in training_f1_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

  Precision K-Fold Training Data Before Tuning
Fold     Random Forest   Adaboost   Stacking  
1        0.710           0.668      0.654     
2        0.710           0.677      0.647     
3        0.711           0.707      0.647     
4        0.715           0.578      0.718     
5        0.666           0.652      0.679     
6        0.710           0.647      0.667     
7        0.686           0.482      0.637     
8        0.710           0.645      0.488     
9        0.711           0.672      0.656     
10       0.710           0.667      0.469     


  Precision K-Fold Training Data After Tuning
Fold     Random Forest   Adaboost   Stacking  
1        0.849           0.607      0.715     
2        0.976           0.735      0.714     
3        0.873           0.793      0.709     
4        0.981           0.695      0.715     
5        0.973           0.759      0.711     
6        0.870           0.808      0.716     
7        0.871           0.803      0.716     
8        0.8

In [None]:
#TEST DATA SCORES OF RANDOM FOREST

file_randomforest_beforetuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[0] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_randomforest_beforetuning_data = [(pickle.load(open(file_randomforest_beforetuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_randomforest_beforetuning_data = [(loaded_randomforest_beforetuning_data[yp].predict(x_test)) for yp in range(total_fold)] 
y_pred_values_randomforest_beforetuning_data = [y_pred_randomforest_beforetuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_randomforest_beforetuning_data[hehee] = np.reshape(y_pred_values_randomforest_beforetuning_data[hehee],(-1,1))
result_randomforest_beforetuning_data = [(classification_report(y_test, y_pred_values_randomforest_beforetuning_data[res], output_dict = True, zero_division=0)) for res in range(total_fold)]

file_randomforest_aftertuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[1] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_randomforest_aftertuning_data = [(pickle.load(open(file_randomforest_aftertuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_randomforest_aftertuning_data = [(loaded_randomforest_aftertuning_data[yp].predict(x_test)) for yp in range(total_fold)] 
y_pred_values_randomforest_aftertuning_data = [y_pred_randomforest_aftertuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_randomforest_aftertuning_data[hehee] = np.reshape(y_pred_values_randomforest_aftertuning_data[hehee],(-1,1))
result_randomforest_aftertuning_data = [(classification_report(y_test, y_pred_values_randomforest_aftertuning_data[res], output_dict = True)) for res in range(total_fold)]

In [None]:
#TEST DATA SCORES OF ADABOOST

file_adaboost_beforetuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[2] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_adaboost_beforetuning_data = [(pickle.load(open(file_adaboost_beforetuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_adaboost_beforetuning_data = [(loaded_adaboost_beforetuning_data[yp].predict(x_test)) for yp in range(total_fold)] 
y_pred_values_adaboost_beforetuning_data = [y_pred_adaboost_beforetuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_adaboost_beforetuning_data[hehee] = np.reshape(y_pred_values_adaboost_beforetuning_data[hehee],(-1,1))
result_adaboost_beforetuning_data = [(classification_report(y_test, y_pred_values_adaboost_beforetuning_data[res], output_dict = True)) for res in range(total_fold)]

file_adaboost_aftertuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[3] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_adaboost_aftertuning_data = [(pickle.load(open(file_adaboost_aftertuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_adaboost_aftertuning_data = [(loaded_adaboost_aftertuning_data[yp].predict(x_test)) for yp in range(total_fold)] 
y_pred_values_adaboost_aftertuning_data = [y_pred_adaboost_aftertuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_adaboost_aftertuning_data[hehee] = np.reshape(y_pred_values_adaboost_aftertuning_data[hehee],(-1,1))
result_adaboost_aftertuning_data = [(classification_report(y_test, y_pred_values_adaboost_aftertuning_data[res], output_dict = True)) for res in range(total_fold)]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#TEST DATA SCORES OF STACKING

file_stacking_beforetuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[4] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_stacking_beforetuning_data = [(pickle.load(open(file_stacking_beforetuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_stacking_beforetuning_data = [(loaded_stacking_beforetuning_data[yp].predict(x_test)) for yp in range(total_fold)] 
y_pred_values_stacking_beforetuning_data = [y_pred_stacking_beforetuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_stacking_beforetuning_data[hehee] = np.reshape(y_pred_values_stacking_beforetuning_data[hehee],(-1,1))
result_stacking_beforetuning_data = [(classification_report(y_test, y_pred_values_stacking_beforetuning_data[res], output_dict = True, zero_division=0)) for res in range(total_fold)]

file_stacking_aftertuning_data = [("drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[5] + " fold" + str(fn+1) + ".sav") for fn in range(total_fold)]
loaded_stacking_aftertuning_data = [(pickle.load(open(file_stacking_aftertuning_data[lm], "rb"))) for lm in range(total_fold)]
y_pred_stacking_aftertuning_data = [(loaded_stacking_aftertuning_data[yp].predict(x_test)) for yp in range(total_fold)] 
y_pred_values_stacking_aftertuning_data = [y_pred_stacking_aftertuning_data[ypv] for ypv in range(total_fold)]
for hehee in range(total_fold):
  y_pred_values_stacking_aftertuning_data[hehee] = np.reshape(y_pred_values_stacking_aftertuning_data[hehee],(-1,1))
result_stacking_aftertuning_data = [(classification_report(y_test, y_pred_values_stacking_aftertuning_data[res], output_dict = True, zero_division=0)) for res in range(total_fold)]

In [None]:
test_precision_list_before = {}
test_precision_list_after = {}
test_recall_list_before = {}
test_recall_list_after = {}
test_accuracy_list_before = {}
test_accuracy_list_after = {}
test_f1_list_before = {}
test_f1_list_after = {}

In [None]:
for scr in range(total_fold):
  test_precision_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['macro avg']['precision'] , result_adaboost_beforetuning_data[scr]['macro avg']['precision'], result_stacking_beforetuning_data[scr]['macro avg']['precision']]
  test_precision_list_after[(scr+1)] = [result_randomforest_aftertuning_data[scr]['macro avg']['precision'] , result_adaboost_aftertuning_data[scr]['macro avg']['precision'], result_stacking_aftertuning_data[scr]['macro avg']['precision']]
  test_recall_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['macro avg']['recall'] , result_adaboost_beforetuning_data[scr]['macro avg']['recall'], result_stacking_beforetuning_data[scr]['macro avg']['recall']]
  test_recall_list_after[(scr+1)] = [result_randomforest_aftertuning_data[scr]['macro avg']['recall'] , result_adaboost_aftertuning_data[scr]['macro avg']['recall'], result_stacking_aftertuning_data[scr]['macro avg']['recall']]
  test_accuracy_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['accuracy'] , result_adaboost_beforetuning_data[scr]['accuracy'], result_stacking_beforetuning_data[scr]['accuracy']]
  test_accuracy_list_after[(scr+1)] = [result_randomforest_aftertuning_data[scr]['accuracy'] , result_adaboost_aftertuning_data[scr]['accuracy'], result_stacking_aftertuning_data[scr]['accuracy']]
  test_f1_list_before[(scr+1)] = [result_randomforest_beforetuning_data[scr]['macro avg']['f1-score'] , result_adaboost_beforetuning_data[scr]['macro avg']['f1-score'], result_stacking_beforetuning_data[scr]['macro avg']['precision']]
  test_f1_list_after[(scr+1)] = [result_randomforest_aftertuning_data[scr]['macro avg']['f1-score'] , result_adaboost_aftertuning_data[scr]['macro avg']['f1-score'], result_stacking_aftertuning_data[scr]['macro avg']['precision']]

In [None]:
print("{:<1} {:<0}".format("","Precision K-Fold Test Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_precision_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","Precision K-Fold Test Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_precision_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

print("{:<1} {:<0}".format("","Recall K-Fold Test Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_recall_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","Recall K-Fold Test Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_recall_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

print("{:<1} {:<0}".format("","Accuracy K-Fold Test Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_accuracy_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","Accuracy K-Fold  Test Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_accuracy_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

print("{:<1} {:<0}".format("","F-1Score K-Fold Test Data Before Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_f1_list_before.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")
print("{:<1} {:<0}".format("","F-1Score K-Fold  Test Data After Tuning"))
print ("{:<8} {:<15} {:<10} {:<10}".format('Fold','Random Forest','Adaboost','Stacking'))
for k, v in test_f1_list_after.items():
    random_forest_fold_data, adaboost_fold_data, stacking_fold_data = v
    print ("{:<8} {:<15} {:<10} {:<10}".format(k, format(random_forest_fold_data,".3f"), format(adaboost_fold_data,".3f"), format(stacking_fold_data,".3f") ))
print("")
print("")

  Precision K-Fold Test Data Before Tuning
Fold     Random Forest   Adaboost   Stacking  
1        0.709           0.661      0.656     
2        0.710           0.677      0.638     
3        0.709           0.701      0.643     
4        0.714           0.577      0.712     
5        0.666           0.646      0.677     
6        0.709           0.646      0.664     
7        0.686           0.475      0.627     
8        0.709           0.640      0.483     
9        0.710           0.668      0.654     
10       0.709           0.667      0.468     


  Precision K-Fold Test Data After Tuning
Fold     Random Forest   Adaboost   Stacking  
1        0.847           0.597      0.715     
2        0.973           0.749      0.714     
3        0.872           0.785      0.708     
4        0.980           0.690      0.715     
5        0.971           0.754      0.710     
6        0.869           0.796      0.716     
7        0.869           0.784      0.715     
8        0.841      

In [None]:
y_test = label_encoder.inverse_transform(y_test)

  y = column_or_1d(y, warn=True)


In [None]:
#Adaboost Folds Before Tuning Confusion Matrix
prev_fold = 0
for fold_of in range(total_fold):
  fold_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[2] + " fold" + str(fold_of+1) + ".sav"
  fold_model = pickle.load(open(fold_model_name, "rb"))
  y_pred_fold = fold_model.predict(x_test)
  y_pred_fold = np.reshape(y_pred_fold, (-1,1))
  print("")
  print("Adaboost Before Tuning Confusion Matrix Fold " + str(fold_of+1))
  y_pred_fold = label_encoder.inverse_transform(y_pred_fold)
  fold_cf = confusion_matrix(y_test, y_pred_fold, labels=["BENIGN", "Syn", "UDP", "UDPLag"])
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',fold_cf[0][0],fold_cf[0][1],fold_cf[0][2],fold_cf[0][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',fold_cf[1][0],fold_cf[1][1],fold_cf[1][2],fold_cf[1][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',fold_cf[2][0],fold_cf[2][1],fold_cf[1][2],fold_cf[2][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',fold_cf[3][0],fold_cf[3][1],fold_cf[3][2],fold_cf[3][3]))
  print("")
  print("")
  print(classification_report(y_test, y_pred_fold, zero_division=1))


Adaboost Before Tuning Confusion Matrix Fold 1
           BENIGN     Syn        UDP        UDPLag    
BENIGN     404        169        308        45        
Syn        12         19330      326        62        
UDP        16         687        326        0         
UDPLag     20         9          439        5         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.89      0.44      0.59       926
         Syn       0.96      0.98      0.97     19730
         UDP       0.75      0.82      0.78      3871
      UDPLag       0.04      0.01      0.02       473

    accuracy                           0.92     25000
   macro avg       0.66      0.56      0.59     25000
weighted avg       0.91      0.92      0.91     25000


Adaboost Before Tuning Confusion Matrix Fold 2
           BENIGN     Syn        UDP        UDPLag    
BENIGN     477        171        231        47        
Syn        9          19379      270        72        
UDP        7          755        270        1         
UDPLag     19         9          438        7         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.93      0.52      0.66       926
         Syn       0.95      0.98      0.97     19730
         UDP       0.77      0.80      0.79      3871
      UDPLag       0.06      0.01      0.02       473

    accuracy                           0.92     25000
   macro avg       0.68      0.58      0.61     25000
weighted avg       0.91      0.92      0.91     25000


Adaboost Before Tuning Confusion Matrix Fold 3
           BENIGN     Syn        UDP        UDPLag    
BENIGN     409        253        209        55        
Syn        3          19439      198        90        
UDP        8          802        198        1832      
UDPLag     19         15         12         427       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.93      0.44      0.60       926
         Syn       0.95      0.99      0.97     19730
         UDP       0.75      0.32      0.45      3871
      UDPLag       0.18      0.90      0.30       473

    accuracy                           0.86     25000
   macro avg       0.70      0.66      0.58     25000
weighted avg       0.90      0.86      0.86     25000


Adaboost Before Tuning Confusion Matrix Fold 4
           BENIGN     Syn        UDP        UDPLag    
BENIGN     523        143        219        41        
Syn        338        19025      293        74        
UDP        64         705        293        0         
UDPLag     22         9          438        4         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.55      0.56      0.56       926
         Syn       0.96      0.96      0.96     19730
         UDP       0.77      0.80      0.78      3871
      UDPLag       0.03      0.01      0.01       473

    accuracy                           0.91     25000
   macro avg       0.58      0.58      0.58     25000
weighted avg       0.89      0.91      0.90     25000


Adaboost Before Tuning Confusion Matrix Fold 5
           BENIGN     Syn        UDP        UDPLag    
BENIGN     413        241        233        39        
Syn        13         19396      263        58        
UDP        45         715        263        0         
UDPLag     20         13         438        2         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.84      0.45      0.58       926
         Syn       0.95      0.98      0.97     19730
         UDP       0.77      0.80      0.79      3871
      UDPLag       0.02      0.00      0.01       473

    accuracy                           0.92     25000
   macro avg       0.65      0.56      0.59     25000
weighted avg       0.90      0.92      0.91     25000


Adaboost Before Tuning Confusion Matrix Fold 6
           BENIGN     Syn        UDP        UDPLag    
BENIGN     515        150        217        44        
Syn        81         19290      292        67        
UDP        9          796        292        0         
UDPLag     20         10         438        5         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.82      0.56      0.66       926
         Syn       0.95      0.98      0.97     19730
         UDP       0.76      0.79      0.78      3871
      UDPLag       0.04      0.01      0.02       473

    accuracy                           0.92     25000
   macro avg       0.65      0.58      0.61     25000
weighted avg       0.90      0.92      0.91     25000


Adaboost Before Tuning Confusion Matrix Fold 7
           BENIGN     Syn        UDP        UDPLag    
BENIGN     547        145        191        43        
Syn        414        19027      191        98        
UDP        1899       740        191        0         
UDPLag     449        10         11         3         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.17      0.59      0.26       926
         Syn       0.96      0.96      0.96     19730
         UDP       0.76      0.32      0.45      3871
      UDPLag       0.02      0.01      0.01       473

    accuracy                           0.83     25000
   macro avg       0.47      0.47      0.42     25000
weighted avg       0.88      0.83      0.84     25000


Adaboost Before Tuning Confusion Matrix Fold 8
           BENIGN     Syn        UDP        UDPLag    
BENIGN     516        155        207        48        
Syn        45         19328      279        78        
UDP        67         729        279        0         
UDPLag     21         10         437        5         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.80      0.56      0.66       926
         Syn       0.96      0.98      0.97     19730
         UDP       0.77      0.79      0.78      3871
      UDPLag       0.04      0.01      0.02       473

    accuracy                           0.92     25000
   macro avg       0.64      0.59      0.61     25000
weighted avg       0.90      0.92      0.91     25000


Adaboost Before Tuning Confusion Matrix Fold 9
           BENIGN     Syn        UDP        UDPLag    
BENIGN     378        274        236        38        
Syn        2          19412      259        57        
UDP        7          775        259        0         
UDPLag     18         15         438        2         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.93      0.41      0.57       926
         Syn       0.95      0.98      0.97     19730
         UDP       0.77      0.80      0.78      3871
      UDPLag       0.02      0.00      0.01       473

    accuracy                           0.92     25000
   macro avg       0.67      0.55      0.58     25000
weighted avg       0.90      0.92      0.90     25000


Adaboost Before Tuning Confusion Matrix Fold 10
           BENIGN     Syn        UDP        UDPLag    
BENIGN     482        160        242        42        
Syn        6          19358      285        81        
UDP        29         686        285        0         
UDPLag     21         8          438        6         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.90      0.52      0.66       926
         Syn       0.96      0.98      0.97     19730
         UDP       0.77      0.82      0.79      3871
      UDPLag       0.05      0.01      0.02       473

    accuracy                           0.92     25000
   macro avg       0.67      0.58      0.61     25000
weighted avg       0.91      0.92      0.91     25000



In [None]:
#Adaboost Folds After Tuning Confusion Matrix
prev_fold = 0
for fold_of in range(total_fold):
  fold_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[3] + " fold" + str(fold_of+1) + ".sav"
  fold_model = pickle.load(open(fold_model_name, "rb"))
  y_pred_fold = fold_model.predict(x_test)
  y_pred_fold = np.reshape(y_pred_fold, (-1,1))
  print("")
  print("Adaboost After Tuning Confusion Matrix Fold " + str(fold_of+1))
  y_pred_fold = label_encoder.inverse_transform(y_pred_fold)
  fold_cf = confusion_matrix(y_test, y_pred_fold, labels=["BENIGN", "Syn", "UDP", "UDPLag"])
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',fold_cf[0][0],fold_cf[0][1],fold_cf[0][2],fold_cf[0][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',fold_cf[1][0],fold_cf[1][1],fold_cf[1][2],fold_cf[1][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',fold_cf[2][0],fold_cf[2][1],fold_cf[1][2],fold_cf[2][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',fold_cf[3][0],fold_cf[3][1],fold_cf[3][2],fold_cf[3][3]))
  print("")
  print("")
  print(classification_report(y_test, y_pred_fold, zero_division=1))


Adaboost After Tuning Confusion Matrix Fold 1
           BENIGN     Syn        UDP        UDPLag    
BENIGN     690        216        9          11        
Syn        780        18810      140        0         
UDP        17         3          140        1         
UDPLag     34         9          429        1         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.45      0.75      0.56       926
         Syn       0.99      0.95      0.97     19730
         UDP       0.87      0.99      0.93      3871
      UDPLag       0.08      0.00      0.00       473

    accuracy                           0.93     25000
   macro avg       0.60      0.67      0.62     25000
weighted avg       0.93      0.93      0.93     25000


Adaboost After Tuning Confusion Matrix Fold 2
           BENIGN     Syn        UDP        UDPLag    
BENIGN     493        372        36         25        
Syn        3          19587      136        4         
UDP        10         5          136        2         
UDPLag     15         20         430        8         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      0.53      0.68       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.86      1.00      0.93      3871
      UDPLag       0.21      0.02      0.03       473

    accuracy                           0.96     25000
   macro avg       0.75      0.63      0.66     25000
weighted avg       0.95      0.96      0.95     25000


Adaboost After Tuning Confusion Matrix Fold 3
           BENIGN     Syn        UDP        UDPLag    
BENIGN     655        229        34         8         
Syn        49         19541      138        2         
UDP        70         3          138        0         
UDPLag     29         12         423        9         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.82      0.71      0.76       926
         Syn       0.99      0.99      0.99     19730
         UDP       0.86      0.98      0.92      3871
      UDPLag       0.47      0.02      0.04       473

    accuracy                           0.96     25000
   macro avg       0.79      0.67      0.68     25000
weighted avg       0.95      0.96      0.95     25000


Adaboost After Tuning Confusion Matrix Fold 4
           BENIGN     Syn        UDP        UDPLag    
BENIGN     596        303        26         1         
Syn        12         19579      139        0         
UDP        15         5          139        0         
UDPLag     32         13         428        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.91      0.64      0.75       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.87      0.99      0.93      3871
      UDPLag       0.00      0.00      0.00       473

    accuracy                           0.96     25000
   macro avg       0.69      0.66      0.67     25000
weighted avg       0.94      0.96      0.95     25000


Adaboost After Tuning Confusion Matrix Fold 5
           BENIGN     Syn        UDP        UDPLag    
BENIGN     628        247        25         26        
Syn        30         19551      126        23        
UDP        17         2          126        1231      
UDPLag     32         12         71         358       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.89      0.68      0.77       926
         Syn       0.99      0.99      0.99     19730
         UDP       0.92      0.68      0.78      3871
      UDPLag       0.22      0.76      0.34       473

    accuracy                           0.93     25000
   macro avg       0.75      0.78      0.72     25000
weighted avg       0.96      0.93      0.94     25000


Adaboost After Tuning Confusion Matrix Fold 6
           BENIGN     Syn        UDP        UDPLag    
BENIGN     487        364        61         14        
Syn        0          19590      137        3         
UDP        4          5          137        8         
UDPLag     3          16         440        14        




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.99      0.53      0.69       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.86      1.00      0.92      3871
      UDPLag       0.36      0.03      0.05       473

    accuracy                           0.96     25000
   macro avg       0.80      0.64      0.66     25000
weighted avg       0.95      0.96      0.95     25000


Adaboost After Tuning Confusion Matrix Fold 7
           BENIGN     Syn        UDP        UDPLag    
BENIGN     569        343        11         3         
Syn        10         19584      131        5         
UDP        16         4          131        17        
UDPLag     32         13         413        15        




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.91      0.61      0.73       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.87      0.99      0.93      3871
      UDPLag       0.38      0.03      0.06       473

    accuracy                           0.96     25000
   macro avg       0.78      0.66      0.68     25000
weighted avg       0.95      0.96      0.95     25000


Adaboost After Tuning Confusion Matrix Fold 8
           BENIGN     Syn        UDP        UDPLag    
BENIGN     491        403        22         10        
Syn        6          19586      125        13        
UDP        7          20         125        1592      
UDPLag     23         19         236        195       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.93      0.53      0.68       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.85      0.58      0.69      3871
      UDPLag       0.11      0.41      0.17       473

    accuracy                           0.90     25000
   macro avg       0.72      0.63      0.63     25000
weighted avg       0.94      0.90      0.91     25000


Adaboost After Tuning Confusion Matrix Fold 9
           BENIGN     Syn        UDP        UDPLag    
BENIGN     587        300        37         2         
Syn        14         19573      135        8         
UDP        6          5          135        56        
UDPLag     17         16         390        50        




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.94      0.63      0.76       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.87      0.98      0.92      3871
      UDPLag       0.43      0.11      0.17       473

    accuracy                           0.96     25000
   macro avg       0.81      0.68      0.71     25000
weighted avg       0.95      0.96      0.95     25000


Adaboost After Tuning Confusion Matrix Fold 10
           BENIGN     Syn        UDP        UDPLag    
BENIGN     523        357        26         20        
Syn        60         19533      125        12        
UDP        17         3          125        837       
UDPLag     32         12         176        253       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.83      0.56      0.67       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.90      0.78      0.84      3871
      UDPLag       0.23      0.53      0.32       473

    accuracy                           0.93     25000
   macro avg       0.73      0.72      0.70     25000
weighted avg       0.95      0.93      0.94     25000



In [None]:
#Stacking Folds Before Tuning Confusion Matrix
prev_fold = 0
for fold_of in range(total_fold):
  fold_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[4] + " fold" + str(fold_of+1) + ".sav"
  fold_model = pickle.load(open(fold_model_name, "rb"))
  y_pred_fold = fold_model.predict(x_test)
  y_pred_fold = np.reshape(y_pred_fold, (-1,1))
  print("")
  print("Stacking Before Tuning Confusion Matrix Fold " + str(fold_of+1))
  y_pred_fold = label_encoder.inverse_transform(y_pred_fold)
  fold_cf = confusion_matrix(y_test, y_pred_fold, labels=["BENIGN", "Syn", "UDP", "UDPLag"])
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',fold_cf[0][0],fold_cf[0][1],fold_cf[0][2],fold_cf[0][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',fold_cf[1][0],fold_cf[1][1],fold_cf[1][2],fold_cf[1][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',fold_cf[2][0],fold_cf[2][1],fold_cf[1][2],fold_cf[2][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',fold_cf[3][0],fold_cf[3][1],fold_cf[3][2],fold_cf[3][3]))
  print("")
  print("")
  print(classification_report(y_test, y_pred_fold, zero_division=1))


Stacking Before Tuning Confusion Matrix Fold 1
           BENIGN     Syn        UDP        UDPLag    
BENIGN     373        236        315        2         
Syn        20         19336      312        62        
UDP        7          529        312        0         
UDPLag     20         12         440        1         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.89      0.40      0.55       926
         Syn       0.96      0.98      0.97     19730
         UDP       0.76      0.86      0.81      3871
      UDPLag       0.02      0.00      0.00       473

    accuracy                           0.92     25000
   macro avg       0.66      0.56      0.58     25000
weighted avg       0.91      0.92      0.91     25000


Stacking Before Tuning Confusion Matrix Fold 2
           BENIGN     Syn        UDP        UDPLag    
BENIGN     428        268        221        9         
Syn        196        19402      117        15        
UDP        10         636        117        1         
UDPLag     21         11         437        4         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.65      0.46      0.54       926
         Syn       0.95      0.98      0.97     19730
         UDP       0.81      0.83      0.82      3871
      UDPLag       0.14      0.01      0.02       473

    accuracy                           0.92     25000
   macro avg       0.64      0.57      0.59     25000
weighted avg       0.91      0.92      0.91     25000


Stacking Before Tuning Confusion Matrix Fold 3
           BENIGN     Syn        UDP        UDPLag    
BENIGN     425        206        253        42        
Syn        86         19467      112        65        
UDP        10         559        112        0         
UDPLag     20         14         437        2         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.79      0.46      0.58       926
         Syn       0.96      0.99      0.97     19730
         UDP       0.80      0.85      0.83      3871
      UDPLag       0.02      0.00      0.01       473

    accuracy                           0.93     25000
   macro avg       0.64      0.58      0.60     25000
weighted avg       0.91      0.93      0.92     25000


Stacking Before Tuning Confusion Matrix Fold 4
           BENIGN     Syn        UDP        UDPLag    
BENIGN     321        374        172        59        
Syn        4          19548      100        78        
UDP        6          809        100        1840      
UDPLag     19         15         10         429       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.92      0.35      0.50       926
         Syn       0.94      0.99      0.97     19730
         UDP       0.81      0.31      0.45      3871
      UDPLag       0.18      0.91      0.30       473

    accuracy                           0.86     25000
   macro avg       0.71      0.64      0.56     25000
weighted avg       0.91      0.86      0.86     25000


Stacking Before Tuning Confusion Matrix Fold 5
           BENIGN     Syn        UDP        UDPLag    
BENIGN     452        198        239        37        
Syn        4          19515      144        67        
UDP        9          603        144        0         
UDPLag     20         13         438        2         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.93      0.49      0.64       926
         Syn       0.96      0.99      0.97     19730
         UDP       0.80      0.84      0.82      3871
      UDPLag       0.02      0.00      0.01       473

    accuracy                           0.93     25000
   macro avg       0.68      0.58      0.61     25000
weighted avg       0.92      0.93      0.92     25000


Stacking Before Tuning Confusion Matrix Fold 6
           BENIGN     Syn        UDP        UDPLag    
BENIGN     405        274        206        41        
Syn        44         19502      117        67        
UDP        10         508        117        0         
UDPLag     20         12         437        4         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.85      0.44      0.58       926
         Syn       0.96      0.99      0.97     19730
         UDP       0.82      0.87      0.84      3871
      UDPLag       0.04      0.01      0.01       473

    accuracy                           0.93     25000
   macro avg       0.66      0.58      0.60     25000
weighted avg       0.92      0.93      0.92     25000


Stacking Before Tuning Confusion Matrix Fold 7
           BENIGN     Syn        UDP        UDPLag    
BENIGN     407        189        227        103       
Syn        78         19418      169        65        
UDP        48         589        169        1         
UDPLag     20         12         438        3         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.74      0.44      0.55       926
         Syn       0.96      0.98      0.97     19730
         UDP       0.79      0.84      0.81      3871
      UDPLag       0.02      0.01      0.01       473

    accuracy                           0.92     25000
   macro avg       0.63      0.57      0.59     25000
weighted avg       0.91      0.92      0.91     25000


Stacking Before Tuning Confusion Matrix Fold 8
           BENIGN     Syn        UDP        UDPLag    
BENIGN     552        259        115        0         
Syn        395        19175      96         64        
UDP        1851       993        96         0         
UDPLag     450        13         10         0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.17      0.60      0.26       926
         Syn       0.94      0.97      0.95     19730
         UDP       0.82      0.27      0.40      3871
      UDPLag       0.00      0.00      0.00       473

    accuracy                           0.83     25000
   macro avg       0.48      0.46      0.41     25000
weighted avg       0.87      0.83      0.83     25000


Stacking Before Tuning Confusion Matrix Fold 9
           BENIGN     Syn        UDP        UDPLag    
BENIGN     379        266        274        7         
Syn        6          19537      119        68        
UDP        9          1129       119        0         
UDPLag     20         14         439        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.92      0.41      0.57       926
         Syn       0.93      0.99      0.96     19730
         UDP       0.77      0.71      0.74      3871
      UDPLag       0.00      0.00      0.00       473

    accuracy                           0.91     25000
   macro avg       0.65      0.53      0.57     25000
weighted avg       0.89      0.91      0.89     25000


Stacking Before Tuning Confusion Matrix Fold 10
           BENIGN     Syn        UDP        UDPLag    
BENIGN     607        160        159        0         
Syn        5220       14330      113        67        
UDP        49         601        113        0         
UDPLag     26         12         435        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.10      0.66      0.18       926
         Syn       0.95      0.73      0.82     19730
         UDP       0.82      0.83      0.83      3871
      UDPLag       0.00      0.00      0.00       473

    accuracy                           0.73     25000
   macro avg       0.47      0.55      0.46     25000
weighted avg       0.88      0.73      0.78     25000



In [None]:

#Stacking Folds After Tuning Confusion Matrix
prev_fold = 0
for fold_of in range(total_fold):
  fold_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[5] + " fold" + str(fold_of+1) + ".sav"
  fold_model = pickle.load(open(fold_model_name, "rb"))
  y_pred_fold = fold_model.predict(x_test)
  y_pred_fold = np.reshape(y_pred_fold, (-1,1))
  print("")
  print("Stacking After Tuning Confusion Matrix Fold " + str(fold_of+1))
  y_pred_fold = label_encoder.inverse_transform(y_pred_fold)
  fold_cf = confusion_matrix(y_test, y_pred_fold, labels=["BENIGN", "Syn", "UDP", "UDPLag"])
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',fold_cf[0][0],fold_cf[0][1],fold_cf[0][2],fold_cf[0][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',fold_cf[1][0],fold_cf[1][1],fold_cf[1][2],fold_cf[1][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',fold_cf[2][0],fold_cf[2][1],fold_cf[1][2],fold_cf[2][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',fold_cf[3][0],fold_cf[3][1],fold_cf[3][2],fold_cf[3][3]))
  print("")
  print("")
  print(classification_report(y_test, y_pred_fold, zero_division=1))


Stacking After Tuning Confusion Matrix Fold 1
           BENIGN     Syn        UDP        UDPLag    
BENIGN     777        148        1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     16         27         430        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.97      0.84      0.90       926
         Syn       0.99      1.00      0.99     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.97     25000
   macro avg       0.97      0.71      0.71     25000
weighted avg       0.98      0.97      0.96     25000


Stacking After Tuning Confusion Matrix Fold 2


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     727        198        1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     16         27         430        0         


              precision    recall  f1-score   support

      BENIGN       0.97      0.79      0.87       926
         Syn       0.99      1.00      0.99     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.97     25000
   macro avg       0.96      0.69      0.70     25000
weighted avg       0.97      0.97      0.96     25000


Stacking After Tuning Confusion Matrix Fold 3


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     495        430        1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     16         28         429        0         


              precision    recall  f1-score   support

      BENIGN       0.96      0.53      0.69       926
         Syn       0.98      1.00      0.99     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.96     25000
   macro avg       0.96      0.63      0.65     25000
weighted avg       0.96      0.96      0.95     25000


Stacking After Tuning Confusion Matrix Fold 4


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     751        174        1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     16         28         429        0         


              precision    recall  f1-score   support

      BENIGN       0.97      0.81      0.88       926
         Syn       0.99      1.00      0.99     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.97     25000
   macro avg       0.96      0.70      0.71     25000
weighted avg       0.97      0.97      0.96     25000


Stacking After Tuning Confusion Matrix Fold 5


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     601        321        4          0         
Syn        1          19717      12         0         
UDP        6          12         12         0         
UDPLag     16         28         429        0         


              precision    recall  f1-score   support

      BENIGN       0.96      0.65      0.78       926
         Syn       0.98      1.00      0.99     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.97     25000
   macro avg       0.96      0.66      0.68     25000
weighted avg       0.97      0.97      0.96     25000


Stacking After Tuning Confusion Matrix Fold 6
           BENIGN     Syn        UDP        UDPLag    
BENIGN     827        98         1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     16        

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.98      0.89      0.93       926
         Syn       0.99      1.00      1.00     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.98     25000
   macro avg       0.97      0.72      0.72     25000
weighted avg       0.98      0.98      0.97     25000


Stacking After Tuning Confusion Matrix Fold 7


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     856        70         0          0         
Syn        0          19718      12         0         
UDP        7          11         12         0         
UDPLag     22         22         429        0         


              precision    recall  f1-score   support

      BENIGN       0.97      0.92      0.95       926
         Syn       0.99      1.00      1.00     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.98     25000
   macro avg       0.96      0.73      0.72     25000
weighted avg       0.98      0.98      0.97     25000


Stacking After Tuning Confusion Matrix Fold 8


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     842        83         1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     16         28         429        0         


              precision    recall  f1-score   support

      BENIGN       0.98      0.91      0.94       926
         Syn       0.99      1.00      1.00     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.98     25000
   macro avg       0.97      0.73      0.72     25000
weighted avg       0.98      0.98      0.97     25000


Stacking After Tuning Confusion Matrix Fold 9


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     810        115        1          0         
Syn        1          19717      12         0         
UDP        6          12         12         0         
UDPLag     16         27         430        0         


              precision    recall  f1-score   support

      BENIGN       0.97      0.87      0.92       926
         Syn       0.99      1.00      1.00     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.98     25000
   macro avg       0.97      0.72      0.72     25000
weighted avg       0.98      0.98      0.97     25000


Stacking After Tuning Confusion Matrix Fold 10


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     833        92         1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     17         27         429        0         


              precision    recall  f1-score   support

      BENIGN       0.97      0.90      0.94       926
         Syn       0.99      1.00      1.00     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.98     25000
   macro avg       0.97      0.72      0.72     25000
weighted avg       0.98      0.98      0.97     25000



In [None]:
#Random Forest Folds Before Tuning Confusion Matrix
prev_fold = 0
for fold_of in range(total_fold):
  fold_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[0] + " fold" + str(fold_of+1) + ".sav"
  fold_model = pickle.load(open(fold_model_name, "rb"))
  y_pred_fold = fold_model.predict(x_test)
  y_pred_fold = np.reshape(y_pred_fold, (-1,1))
  print("")
  print("Random Forest Before Tuning Confusion Matrix Fold " + str(fold_of+1))
  y_pred_fold = label_encoder.inverse_transform(y_pred_fold)
  fold_cf = confusion_matrix(y_test, y_pred_fold, labels=["BENIGN", "Syn", "UDP", "UDPLag"])
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',fold_cf[0][0],fold_cf[0][1],fold_cf[0][2],fold_cf[0][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',fold_cf[1][0],fold_cf[1][1],fold_cf[1][2],fold_cf[1][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',fold_cf[2][0],fold_cf[2][1],fold_cf[1][2],fold_cf[2][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',fold_cf[3][0],fold_cf[3][1],fold_cf[3][2],fold_cf[3][3]))
  print("")
  print("")
  print(classification_report(y_test, y_pred_fold, zero_division=1))


Random Forest Before Tuning Confusion Matrix Fold 1
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        1408       18321      0          0         
UDP        20         0          0          0         
UDPLag     38         0          9          414       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.40      1.00      0.57       988
         Syn       1.00      0.93      0.96     19729
         UDP       1.00      0.99      1.00      3822
      UDPLag       1.00      0.90      0.95       461

    accuracy                           0.94     25000
   macro avg       0.85      0.96      0.87     25000
weighted avg       0.98      0.94      0.95     25000


Random Forest Before Tuning Confusion Matrix Fold 2
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        15         19699      6          9         
UDP        20         0          6          0         
UDPLag     38         0          34         389       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.93      1.00      0.96       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.99      0.99      0.99      3822
      UDPLag       0.98      0.84      0.91       461

    accuracy                           1.00     25000
   macro avg       0.97      0.96      0.97     25000
weighted avg       1.00      1.00      1.00     25000


Random Forest Before Tuning Confusion Matrix Fold 3


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        926        18803      0          0         
UDP        20         0          0          0         
UDPLag     38         0          0          423       


              precision    recall  f1-score   support

      BENIGN       0.50      1.00      0.67       988
         Syn       1.00      0.95      0.98     19729
         UDP       1.00      0.99      1.00      3822
      UDPLag       1.00      0.92      0.96       461

    accuracy                           0.96     25000
   macro avg       0.88      0.97      0.90     25000
weighted avg       0.98      0.96      0.97     25000


Random Forest Before Tuning Confusion Matrix Fold 4
           BENIGN     Syn        UDP        UDPLag    
BENIGN     985        0          3          0         
Syn        2          19712      15         0         
UDP        18         2          15         0         
UDPLag     34  

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      1.00      0.97       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.98      0.99      0.99      3822
      UDPLag       1.00      0.78      0.88       461

    accuracy                           0.99     25000
   macro avg       0.98      0.94      0.96     25000
weighted avg       0.99      0.99      0.99     25000


Random Forest Before Tuning Confusion Matrix Fold 5


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        49         19668      11         1         
UDP        17         2          11         0         
UDPLag     38         0          5          418       


              precision    recall  f1-score   support

      BENIGN       0.90      1.00      0.95       988
         Syn       1.00      1.00      1.00     19729
         UDP       1.00      1.00      1.00      3822
      UDPLag       1.00      0.91      0.95       461

    accuracy                           1.00     25000
   macro avg       0.97      0.97      0.97     25000
weighted avg       1.00      1.00      1.00     25000


Random Forest Before Tuning Confusion Matrix Fold 6
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        889        18825      15         0         
UDP        20         0          15         0         
UDPLag     38  

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.51      1.00      0.68       988
         Syn       1.00      0.95      0.98     19729
         UDP       0.98      0.99      0.99      3822
      UDPLag       1.00      0.78      0.88       461

    accuracy                           0.96     25000
   macro avg       0.87      0.93      0.88     25000
weighted avg       0.98      0.96      0.96     25000


Random Forest Before Tuning Confusion Matrix Fold 7
           BENIGN     Syn        UDP        UDPLag    
BENIGN     986        0          2          0         
Syn        959        18770      0          0         
UDP        20         0          0          0         
UDPLag     38         0          0          423       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.49      1.00      0.66       988
         Syn       1.00      0.95      0.98     19729
         UDP       1.00      0.99      1.00      3822
      UDPLag       1.00      0.92      0.96       461

    accuracy                           0.96     25000
   macro avg       0.87      0.97      0.90     25000
weighted avg       0.98      0.96      0.97     25000


Random Forest Before Tuning Confusion Matrix Fold 8
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        1408       18321      0          0         
UDP        20         0          0          0         
UDPLag     38         0          107        316       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.40      1.00      0.57       988
         Syn       1.00      0.93      0.96     19729
         UDP       0.97      0.99      0.98      3822
      UDPLag       1.00      0.69      0.81       461

    accuracy                           0.94     25000
   macro avg       0.84      0.90      0.83     25000
weighted avg       0.97      0.94      0.95     25000


Random Forest Before Tuning Confusion Matrix Fold 9
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        934        18795      0          0         
UDP        20         0          0          0         
UDPLag     38         0          34         389       




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.50      1.00      0.67       988
         Syn       1.00      0.95      0.98     19729
         UDP       0.99      0.99      0.99      3822
      UDPLag       1.00      0.84      0.92       461

    accuracy                           0.96     25000
   macro avg       0.87      0.95      0.89     25000
weighted avg       0.98      0.96      0.97     25000


Random Forest Before Tuning Confusion Matrix Fold 10


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        941        18773      15         0         
UDP        20         0          15         0         
UDPLag     38         0          33         390       


              precision    recall  f1-score   support

      BENIGN       0.50      1.00      0.66       988
         Syn       1.00      0.95      0.98     19729
         UDP       0.99      0.99      0.99      3822
      UDPLag       1.00      0.85      0.92       461

    accuracy                           0.96     25000
   macro avg       0.87      0.95      0.89     25000
weighted avg       0.98      0.96      0.96     25000



In [None]:
#Random Forest Folds After Tuning Confusion Matrix
prev_fold = 0
for fold_of in range(total_fold):
  fold_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[1] + " fold" + str(fold_of+1) + ".sav"
  fold_model = pickle.load(open(fold_model_name, "rb"))
  y_pred_fold = fold_model.predict(x_test)
  y_pred_fold = np.reshape(y_pred_fold, (-1,1))
  print("")
  print("Random Forest After Tuning Confusion Matrix Fold " + str(fold_of+1))
  y_pred_fold = label_encoder.inverse_transform(y_pred_fold)
  fold_cf = confusion_matrix(y_test, y_pred_fold, labels=["BENIGN", "Syn", "UDP", "UDPLag"])
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',fold_cf[0][0],fold_cf[0][1],fold_cf[0][2],fold_cf[0][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',fold_cf[1][0],fold_cf[1][1],fold_cf[1][2],fold_cf[1][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',fold_cf[2][0],fold_cf[2][1],fold_cf[1][2],fold_cf[2][3]))
  print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',fold_cf[3][0],fold_cf[3][1],fold_cf[3][2],fold_cf[3][3]))
  print("")
  print("")
  print(classification_report(y_test, y_pred_fold, zero_division=1))


Random Forest After Tuning Confusion Matrix Fold 1
           BENIGN     Syn        UDP        UDPLag    
BENIGN     986        2          0          0         
Syn        0          19714      15         0         
UDP        17         2          15         0         
UDPLag     36         2          423        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      1.00      0.97       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 2
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        2          19712      15         0         
UDP        17         2          15         0         
UDPLag     34         4          423        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      1.00      0.97       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 3
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        1          19713      15         0         
UDP        17         2          15         0         
UDPLag     34         4          423        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      1.00      0.97       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 4


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     987        1          0          0         
Syn        0          19714      15         0         
UDP        10         10         15         0         
UDPLag     28         10         423        0         


              precision    recall  f1-score   support

      BENIGN       0.96      1.00      0.98       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      0.99      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 5
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        227        19487      15         0         
UDP        19         0          15         0         
UDPLag     38   

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.78      1.00      0.87       988
         Syn       1.00      0.99      0.99     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.97     25000
   macro avg       0.92      0.75      0.70     25000
weighted avg       0.98      0.97      0.96     25000


Random Forest After Tuning Confusion Matrix Fold 6
           BENIGN     Syn        UDP        UDPLag    
BENIGN     964        20         4          0         
Syn        1          19713      15         0         
UDP        17         2          15         0         
UDPLag     34         4          423        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      0.98      0.96       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.74      0.73     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 7
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        102        19612      15         0         
UDP        17         2          15         0         
UDPLag     34         4          423        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.87      1.00      0.93       988
         Syn       1.00      0.99      1.00     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.94      0.75      0.72     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 8


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        2          19712      15         0         
UDP        18         2          15         0         
UDPLag     34         4          423        0         


              precision    recall  f1-score   support

      BENIGN       0.95      1.00      0.97       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      0.99      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 9
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        1          19713      15         0         
UDP        16         3          15         0         
UDPLag     33   

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      1.00      0.98       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000


Random Forest After Tuning Confusion Matrix Fold 10
           BENIGN     Syn        UDP        UDPLag    
BENIGN     988        0          0          0         
Syn        1          19713      15         0         
UDP        17         2          15         0         
UDPLag     34         4          423        0         




  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.95      1.00      0.97       988
         Syn       1.00      1.00      1.00     19729
         UDP       0.90      1.00      0.94      3822
      UDPLag       1.00      0.00      0.00       461

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000



In [None]:
#Adaboost before tuning model with best accuracy
fold_with_best_scores = 0
prev_fold = 0
best_fold = 0
for fold_of in range(total_fold):
  if (result_adaboost_beforetuning_data[fold_of]['accuracy'] > result_randomforest_beforetuning_data[prev_fold]['accuracy']):
    best_fold = fold_of
  prev_fold = fold_of
print("Adaboost Before Tuning best model is fold number " + str(best_fold+1) + " with accuracy of " + str(result_adaboost_beforetuning_data[best_fold]['accuracy']))
best_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[2] + " fold" + str(best_fold+1) + ".sav"
best_model = pickle.load(open(best_model_name, "rb"))
filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[2] + " best model"+".sav"
pickle.dump(best_model, open(filename, 'wb'))

y_pred_best_model = best_model.predict(x_test)
y_pred_best_model = np.reshape(y_pred_best_model,(-1,1))
print("")
print("Confusion matrix")
y_pred_best_model = label_encoder.inverse_transform(y_pred_best_model)
bmcf = confusion_matrix(y_test, y_pred_best_model, labels=["BENIGN", "Syn", "UDP", "UDPLag"])   #best model confusion matrix
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',bmcf[0][0],bmcf[0][1],bmcf[0][2],bmcf[0][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',bmcf[1][0],bmcf[1][1],bmcf[1][2],bmcf[1][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',bmcf[2][0],bmcf[2][1],bmcf[1][2],bmcf[2][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',bmcf[3][0],bmcf[3][1],bmcf[3][2],bmcf[3][3]))
print("")
print("")
print("")
print(classification_report(y_test, y_pred_best_model, zero_division=1))

Adaboost Before Tuning best model is fold number 1 with accuracy of 0.91628

Confusion matrix
           BENIGN     Syn        UDP        UDPLag    
BENIGN     404        169        308        45        
Syn        12         19330      326        62        
UDP        16         687        326        0         
UDPLag     20         9          439        5         





  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.89      0.44      0.59       926
         Syn       0.96      0.98      0.97     19730
         UDP       0.75      0.82      0.78      3871
      UDPLag       0.04      0.01      0.02       473

    accuracy                           0.92     25000
   macro avg       0.66      0.56      0.59     25000
weighted avg       0.91      0.92      0.91     25000



In [None]:
#Adaboost after tuning model with best accuracy
fold_with_best_scores = 0
prev_fold = 0
best_fold = 0
for fold_of in range(total_fold):
  if (result_adaboost_aftertuning_data[fold_of]['accuracy'] > result_randomforest_aftertuning_data[prev_fold]['accuracy']):
    best_fold = fold_of
  prev_fold = fold_of
print("Adaboost After Tuning best model is fold number " + str(best_fold+1) + " with accuracy of " + str(result_adaboost_aftertuning_data[best_fold]['accuracy']))
best_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[3] + " fold" + str(best_fold+1) + ".sav"
best_model = pickle.load(open(best_model_name, "rb"))
filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[3] + " best model"+".sav"
pickle.dump(best_model, open(filename, 'wb'))

y_pred_best_model = best_model.predict(x_test)
y_pred_best_model = np.reshape(y_pred_best_model,(-1,1))
print("")
print("Confusion matrix")
y_pred_best_model = label_encoder.inverse_transform(y_pred_best_model)
bmcf = confusion_matrix(y_test, y_pred_best_model, labels=["BENIGN", "Syn", "UDP", "UDPLag"])   #best model confusion matrix
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',bmcf[0][0],bmcf[0][1],bmcf[0][2],bmcf[0][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',bmcf[1][0],bmcf[1][1],bmcf[1][2],bmcf[1][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',bmcf[2][0],bmcf[2][1],bmcf[1][2],bmcf[2][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',bmcf[3][0],bmcf[3][1],bmcf[3][2],bmcf[3][3]))
print("")
print("")
print("")
print(classification_report(y_test, y_pred_best_model, zero_division=1))

Adaboost After Tuning best model is fold number 9 with accuracy of 0.96056

Confusion matrix
           BENIGN     Syn        UDP        UDPLag    
BENIGN     587        300        37         2         
Syn        14         19573      135        8         
UDP        6          5          135        56        
UDPLag     17         16         390        50        





  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.94      0.63      0.76       926
         Syn       0.98      0.99      0.99     19730
         UDP       0.87      0.98      0.92      3871
      UDPLag       0.43      0.11      0.17       473

    accuracy                           0.96     25000
   macro avg       0.81      0.68      0.71     25000
weighted avg       0.95      0.96      0.95     25000



In [None]:
#Stacking before tuning model with best accuracy
fold_with_best_scores = 0
prev_fold = 0
best_fold = 0
for fold_of in range(total_fold):
  if (result_stacking_beforetuning_data[fold_of]['accuracy'] > result_randomforest_beforetuning_data[prev_fold]['accuracy']):
    best_fold = fold_of
  prev_fold = fold_of
print("Stacking Before Tuning best model is fold number " + str(best_fold+1) + " with accuracy of " + str(result_stacking_beforetuning_data[best_fold]['accuracy']))
best_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[4] + " fold" + str(best_fold+1) + ".sav"
best_model = pickle.load(open(best_model_name, "rb"))
filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[4] + " best model"+".sav"
pickle.dump(best_model, open(filename, 'wb'))

y_pred_best_model = best_model.predict(x_test)
y_pred_best_model = np.reshape(y_pred_best_model,(-1,1))
print("")
print("Confusion matrix")
y_pred_best_model = label_encoder.inverse_transform(y_pred_best_model)
bmcf = confusion_matrix(y_test, y_pred_best_model, labels=["BENIGN", "Syn", "UDP", "UDPLag"])   #best model confusion matrix
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',bmcf[0][0],bmcf[0][1],bmcf[0][2],bmcf[0][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',bmcf[1][0],bmcf[1][1],bmcf[1][2],bmcf[1][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',bmcf[2][0],bmcf[2][1],bmcf[1][2],bmcf[2][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',bmcf[3][0],bmcf[3][1],bmcf[3][2],bmcf[3][3]))
print("")
print("")
print("")
print(classification_report(y_test, y_pred_best_model, zero_division=1))

Stacking Before Tuning best model is fold number 1 with accuracy of 0.9218

Confusion matrix


  y = column_or_1d(y, warn=True)


           BENIGN     Syn        UDP        UDPLag    
BENIGN     373        236        315        2         
Syn        20         19336      312        62        
UDP        7          529        312        0         
UDPLag     20         12         440        1         



              precision    recall  f1-score   support

      BENIGN       0.89      0.40      0.55       926
         Syn       0.96      0.98      0.97     19730
         UDP       0.76      0.86      0.81      3871
      UDPLag       0.02      0.00      0.00       473

    accuracy                           0.92     25000
   macro avg       0.66      0.56      0.58     25000
weighted avg       0.91      0.92      0.91     25000



In [None]:
#Stacking after tuning model with best accuracy
fold_with_best_scores = 0
prev_fold = 0
best_fold = 0
for fold_of in range(total_fold):
  if (result_stacking_aftertuning_data[fold_of]['accuracy'] > result_randomforest_beforetuning_data[prev_fold]['accuracy']):
    best_fold = fold_of
  prev_fold = fold_of
print("Stacking After Tuning best model is fold number " + str(best_fold+1) + " with accuracy of " + str(result_stacking_aftertuning_data[best_fold]['accuracy']))
best_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[5] + " fold" + str(best_fold+1) + ".sav"
best_model = pickle.load(open(best_model_name, "rb"))
filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[5] + " best model"+".sav"
pickle.dump(best_model, open(filename, 'wb'))

y_pred_best_model = best_model.predict(x_test)
y_pred_best_model = np.reshape(y_pred_best_model,(-1,1))
print("")
print("Confusion matrix")
y_pred_best_model = label_encoder.inverse_transform(y_pred_best_model)
bmcf = confusion_matrix(y_test, y_pred_best_model, labels=["BENIGN", "Syn", "UDP", "UDPLag"])   #best model confusion matrix
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',bmcf[0][0],bmcf[0][1],bmcf[0][2],bmcf[0][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',bmcf[1][0],bmcf[1][1],bmcf[1][2],bmcf[1][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',bmcf[2][0],bmcf[2][1],bmcf[1][2],bmcf[2][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',bmcf[3][0],bmcf[3][1],bmcf[3][2],bmcf[3][3]))
print("")
print("")
print("")
print(classification_report(y_test, y_pred_best_model, zero_division=1))

Stacking After Tuning best model is fold number 8 with accuracy of 0.97652

Confusion matrix
           BENIGN     Syn        UDP        UDPLag    
BENIGN     842        83         1          0         
Syn        0          19718      12         0         
UDP        5          13         12         0         
UDPLag     16         28         429        0         





  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.98      0.91      0.94       926
         Syn       0.99      1.00      1.00     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.98     25000
   macro avg       0.97      0.73      0.72     25000
weighted avg       0.98      0.98      0.97     25000



In [None]:
#Random Forest before tuning model with best accuracy
fold_with_best_scores = 0
prev_fold = 0
best_fold = 0
for fold_of in range(total_fold):
  if (result_randomforest_beforetuning_data[fold_of]['accuracy'] > result_randomforest_beforetuning_data[prev_fold]['accuracy']):
    best_fold = fold_of
  prev_fold = fold_of
print("Random Forest Before Tuning best model is fold number " + str(best_fold+1) + " with accuracy of " + str(result_randomforest_beforetuning_data[best_fold]['accuracy']))
best_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[0] + " fold" + str(best_fold+1) + ".sav"
best_model = pickle.load(open(best_model_name, "rb"))
filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[0] + " best model"+".sav"
pickle.dump(best_model, open(filename, 'wb'))

y_pred_best_model = best_model.predict(x_test)
y_pred_best_model = np.reshape(y_pred_best_model,(-1,1))
print("")
print("Confusion matrix")
y_pred_best_model = label_encoder.inverse_transform(y_pred_best_model)
bmcf = confusion_matrix(y_test, y_pred_best_model, labels=["BENIGN", "Syn", "UDP", "UDPLag"])   #best model confusion matrix
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',bmcf[0][0],bmcf[0][1],bmcf[0][2],bmcf[0][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',bmcf[1][0],bmcf[1][1],bmcf[1][2],bmcf[1][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',bmcf[2][0],bmcf[2][1],bmcf[1][2],bmcf[2][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',bmcf[3][0],bmcf[3][1],bmcf[3][2],bmcf[3][3]))
print("")
print("")
print("")
print(classification_report(y_test, y_pred_best_model, zero_division=1))

Random Forest Before Tuning best model is fold number 9 with accuracy of 0.97984

Confusion matrix
           BENIGN     Syn        UDP        UDPLag    
BENIGN     926        0          0          0         
Syn        1          19717      12         0         
UDP        16         2          12         0         
UDPLag     41         4          428        0         





  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.94      1.00      0.97       926
         Syn       1.00      1.00      1.00     19730
         UDP       0.90      1.00      0.94      3871
      UDPLag       1.00      0.00      0.00       473

    accuracy                           0.98     25000
   macro avg       0.96      0.75      0.73     25000
weighted avg       0.98      0.98      0.97     25000



In [None]:
#Random Forest after tuning model with best accuracy
fold_with_best_scores = 0
prev_fold = 0
best_fold = 0
for fold_of in range(total_fold):
  if (result_randomforest_aftertuning_data[fold_of]['accuracy'] > result_randomforest_aftertuning_data[prev_fold]['accuracy']):
    best_fold = fold_of
  prev_fold = fold_of
print("Random Forest After Tuning best model is fold number " + str(best_fold+1) + " with accuracy of " + str(result_randomforest_aftertuning_data[best_fold]['accuracy']))
best_model_name = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[1] + " fold" + str(best_fold+1) + ".sav"
best_model = pickle.load(open(best_model_name, "rb"))
filename = "drive/MyDrive/Dataset/SavedModels/"+ classifiers_name[1] + " best model"+".sav"
pickle.dump(best_model, open(filename, 'wb'))

y_pred_best_model = best_model.predict(x_test)
y_pred_best_model = np.reshape(y_pred_best_model,(-1,1))
print("")
print("Confusion matrix")
y_pred_best_model = label_encoder.inverse_transform(y_pred_best_model)
bmcf = confusion_matrix(y_test, y_pred_best_model, labels=["BENIGN", "Syn", "UDP", "UDPLag"])   #best model confusion matrix
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('','BENIGN','Syn','UDP','UDPLag'))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('BENIGN',bmcf[0][0],bmcf[0][1],bmcf[0][2],bmcf[0][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('Syn',bmcf[1][0],bmcf[1][1],bmcf[1][2],bmcf[1][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDP',bmcf[2][0],bmcf[2][1],bmcf[1][2],bmcf[2][3]))
print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('UDPLag',bmcf[3][0],bmcf[3][1],bmcf[3][2],bmcf[3][3]))
print("")
print("")
print("")
print(classification_report(y_test, y_pred_best_model, zero_division=1))

Random Forest After Tuning best model is fold number 9 with accuracy of 0.95932

Confusion matrix
           BENIGN     Syn        UDP        UDPLag    
BENIGN     926        0          0          0         
Syn        921        18809      0          0         
UDP        19         0          0          0         
UDPLag     45         0          32         396       





  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

      BENIGN       0.48      1.00      0.65       926
         Syn       1.00      0.95      0.98     19730
         UDP       0.99      1.00      0.99      3871
      UDPLag       1.00      0.84      0.91       473

    accuracy                           0.96     25000
   macro avg       0.87      0.95      0.88     25000
weighted avg       0.98      0.96      0.97     25000

