In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('ground_truth.csv',sep='\t')

In [3]:
x = df.drop(['attackerType'],axis=1)
y = df.iloc[:,2]

from imblearn.combine import SMOTETomek
smk = SMOTETomek(random_state = 0)
x, y = smk.fit_sample(x,y)

In [4]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3, random_state = 0)

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [6]:
estimators = [
     ('rfc',RandomForestClassifier()),
     ('knn', make_pipeline(StandardScaler(),
                           AdaBoostClassifier()))]


reg = StackingClassifier(estimators = estimators,final_estimator=AdaBoostClassifier())

In [7]:
reg.fit(x_train,y_train)
y_pred = reg.predict(x_test)

In [8]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc = accuracy_score(y_test, y_pred)
print("accuracy score %0.2f%%" % (acc *100))

[[10239     0     0     0     0   297]
 [    1    11     0     0     0     0]
 [   47     0     0     0     0     0]
 [    5     0     0     0     0   218]
 [  146     0     0     0     0  1716]
 [  111     0     0     0     0  1780]]
accuracy score 82.56%


In [9]:
estimators = [
     ('rfc',RandomForestClassifier()),
     ('knn', make_pipeline(StandardScaler(),
                           AdaBoostClassifier()))]


reg = StackingClassifier(estimators = estimators,final_estimator=KNeighborsClassifier())

In [10]:
reg.fit(x_train,y_train)
y_pred = reg.predict(x_test)

In [11]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc = accuracy_score(y_test, y_pred)
print("accuracy score %0.2f%%" % (acc *100))

[[10257     0     1     5   130   143]
 [    0    12     0     0     0     0]
 [    1     0    46     0     0     0]
 [    6     0     0   217     0     0]
 [  156     0     0     0  1694    12]
 [  154     0     0     0    21  1716]]
accuracy score 95.68%


In [12]:
estimators = [
     ('rfc',RandomForestClassifier()),
     ('knn', make_pipeline(StandardScaler(),
                           AdaBoostClassifier()))]


reg = StackingClassifier(estimators = estimators,final_estimator=XGBClassifier())

In [13]:
reg.fit(x_train,y_train)
y_pred = reg.predict(x_test)

In [14]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc = accuracy_score(y_test, y_pred)
print("accuracy score %0.2f%%" % (acc *100))

[[10272     2     4    11   118   129]
 [    0    12     0     0     0     0]
 [    3     0    44     0     0     0]
 [    8     0     0   215     0     0]
 [  169     0     0     0  1678    15]
 [  139     0     0     0    16  1736]]
accuracy score 95.79%


In [15]:
estimators = [
     ('xgb',XGBClassifier()),
     ('knn', make_pipeline(StandardScaler(),
                           RandomForestClassifier()))]


reg = StackingClassifier(estimators = estimators,final_estimator=XGBClassifier())

In [16]:
reg.fit(x_train,y_train)
y_pred = reg.predict(x_test)

In [17]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc = accuracy_score(y_test, y_pred)
print("accuracy score %0.2f%%" % (acc *100))

[[10455     0     0     0    37    44]
 [    0    12     0     0     0     0]
 [    4     0    43     0     0     0]
 [    1     0     0   222     0     0]
 [   47     0     0     0  1813     2]
 [   35     0     0     0     3  1853]]
accuracy score 98.81%


In [24]:
reg = AdaBoostClassifier(n_estimators = 50,learning_rate=1.0,algorithm = 'SAMME')
reg.fit(x_train,y_train)
y_pred = reg.predict(x_test)

In [25]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc = accuracy_score(y_test, y_pred)
print("accuracy score %0.2f%%" % (acc *100))

[[10233     0     0    54   219    30]
 [    0     0     0    12     0     0]
 [   45     0     0     2     0     0]
 [  223     0     0     0     0     0]
 [ 1819     0     0     0    38     5]
 [ 1819     0     0     0    58    14]]
accuracy score 70.59%


In [22]:
reg = AdaBoostClassifier(n_estimators = 50,algorithm = 'SAMME.R')
reg.fit(x_train,y_train)
y_pred = reg.predict(x_test)

In [23]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc = accuracy_score(y_test, y_pred)
print("accuracy score %0.2f%%" % (acc *100))

[[5525    0    0    0 5011    0]
 [  12    0    0    0    0    0]
 [  47    0    0    0    0    0]
 [ 223    0    0    0    0    0]
 [ 855    0    0    0 1007    0]
 [ 903    0    0    0  988    0]]
accuracy score 44.83%


In [26]:
from imblearn.combine import SMOTETomek
smk = SMOTETomek(random_state = 0)
x, y = smk.fit_sample(x,y)

In [27]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3, random_state = 0)

In [28]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [29]:
estimators = [
     ('xgb',XGBClassifier()),
     ('knn', make_pipeline(StandardScaler(),
                           RandomForestClassifier()))]


reg = StackingClassifier(estimators = estimators,final_estimator=XGBClassifier())

In [30]:
reg.fit(x_train,y_train)
y_pred = reg.predict(x_test)

In [31]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc = accuracy_score(y_test, y_pred)
print("accuracy score %0.2f%%" % (acc *100))

[[ 8853     1     0    10   207   195]
 [    0 10466     0     0     0     0]
 [    0     0 10389     0     0     0]
 [   16     0     0 10505     0     0]
 [  175     0     0     0  9527   104]
 [  178     0     0     0    60  9487]]
accuracy score 98.43%
