In [0]:
# data analysis packages
import pandas as pd
import numpy as np

# loading bar
!pip install --force https://github.com/chengs/tqdm/archive/colab.zip
from tqdm import tqdm_notebook as tqdm

Collecting https://github.com/chengs/tqdm/archive/colab.zip
[?25l  Downloading https://github.com/chengs/tqdm/archive/colab.zip
[K     - 706kB 1.6MB/s
[?25hBuilding wheels for collected packages: tqdm
  Building wheel for tqdm (setup.py) ... [?25l[?25hdone
  Stored in directory: /tmp/pip-ephem-wheel-cache-cxgwz8su/wheels/41/18/ee/d5dd158441b27965855b1bbae03fa2d8a91fe645c01b419896
Successfully built tqdm
Installing collected packages: tqdm
  Found existing installation: tqdm 4.28.1
    Uninstalling tqdm-4.28.1:
      Successfully uninstalled tqdm-4.28.1
Successfully installed tqdm-4.28.1


In [0]:
from google.colab import drive
drive.mount('/content/drive/')

import os
os.chdir("/content/drive/My Drive/Dataset")

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


## Loading Datasets

In [0]:
full_benign_path = './full_benign_df.csv'

full_benign_df = pd.read_csv(full_benign_path)
full_benign_df.shape

(555932, 116)

In [0]:
full_mirai_path = './full_mirai_df.csv'

full_mirai_df = pd.read_csv(full_mirai_path)
full_mirai_df.shape

(3668402, 116)

In [0]:
full_bashlite_path = './full_bashlite_df.csv'

full_bashlite_df = pd.read_csv(full_bashlite_path)
full_bashlite_df.shape

(2838272, 116)

In [0]:
#Sampling 1% of each of the three classes

sampled_benign_df = full_benign_df.sample(frac =.01)

sampled_mirai_df = full_mirai_df.sample(frac =.01)

sampled_bashlite_df = full_bashlite_df.sample(frac =.01)

## Fishers Score

In [0]:
dataframes = [full_benign_df, full_mirai_df, full_bashlite_df]

sampled_dfs = [sampled_benign_df, sampled_mirai_df, sampled_bashlite_df]

#have to create a list with feature name and fisher score from it

total_points = (len(full_benign_df.index) + len(full_mirai_df.index) + len(full_bashlite_df.index))

feature_fisher_scores = pd.DataFrame(columns=['feature', 'fisher_score'])

features = []
fs = []
k = 0

for j in tqdm(full_benign_df.columns):
  numerator = 0
  denom = 0
  #for i in dataframes:
  for i in sampled_dfs:
    #pi = len(i.index)  / total_points
    pi = len(i.index)
    #i_frames = [full_benign_df.iloc[:,k], full_mirai_df.iloc[:,k], full_bashlite_df.iloc[:,k]]
    i_frames = [sampled_benign_df.iloc[:,k], sampled_mirai_df.iloc[:,k], sampled_bashlite_df.iloc[:,k]]
    
    full_df = pd.concat(i_frames)

    total_mean = np.mean(full_df)
    i_mean = np.mean((i.iloc[:,k]))

    mean_terms = ((total_mean - i_mean)**2)
    #mean_terms = (abs(total_mean - i_mean))
    
    numerator = (pi*mean_terms) + numerator
    
    sd = np.std((i.iloc[:,k]))
    denom = (pi*((sd)**2)) + denom
    #denom = (pi*(abs(sd))) + denom
  
  k = k + 1
  fisher_score = numerator/denom
  features.append(j)
  fs.append(fisher_score)

feature_fisher_scores['feature'] = features
feature_fisher_scores['fisher_score'] = fs

feature_fisher_scores.head()






Unnamed: 0,feature,fisher_score
0,Unnamed: 0,0.069945
1,MI_dir_L5_weight,0.492729
2,MI_dir_L5_mean,0.591789
3,MI_dir_L5_variance,0.506156
4,MI_dir_L3_weight,0.475006


In [0]:
feature_fisher_scores.sort_values(['fisher_score'], ascending=False).head(10)

Unnamed: 0,feature,fisher_score
13,MI_dir_L0.01_weight,1.084794
28,H_L0.01_weight,1.084794
14,MI_dir_L0.01_mean,0.766793
29,H_L0.01_mean,0.766779
11,MI_dir_L0.1_mean,0.75756
26,H_L0.1_mean,0.757559
8,MI_dir_L1_mean,0.726792
23,H_L1_mean,0.726792
30,H_L0.01_variance,0.701236
15,MI_dir_L0.01_variance,0.701235


# Models From Reduced Feature Sets

## Preprocessing the data

In [0]:
s_ben_df = sampled_benign_df

s_ben_df['label'] = 0

s_mir_df = sampled_mirai_df

s_mir_df['label'] = 1

s_bash_df = sampled_bashlite_df

s_bash_df['label'] = 2

s_bash_df = s_bash_df.drop(columns=['Unnamed: 0'])
s_mir_df = s_mir_df.drop(columns=['Unnamed: 0'])
s_ben_df = s_ben_df.drop(columns=['Unnamed: 0'])

frames = [s_bash_df, s_mir_df, s_ben_df]
s_combined_df = pd.concat(frames)

s_rand_combined_df = s_combined_df.sample(frac=1).reset_index(drop=True)

s_rand_combined_df.head(10)

Unnamed: 0,MI_dir_L5_weight,MI_dir_L5_mean,MI_dir_L5_variance,MI_dir_L3_weight,MI_dir_L3_mean,MI_dir_L3_variance,MI_dir_L1_weight,MI_dir_L1_mean,MI_dir_L1_variance,MI_dir_L0.1_weight,MI_dir_L0.1_mean,MI_dir_L0.1_variance,MI_dir_L0.01_weight,MI_dir_L0.01_mean,MI_dir_L0.01_variance,H_L5_weight,H_L5_mean,H_L5_variance,H_L3_weight,H_L3_mean,H_L3_variance,H_L1_weight,H_L1_mean,H_L1_variance,H_L0.1_weight,H_L0.1_mean,H_L0.1_variance,H_L0.01_weight,H_L0.01_mean,H_L0.01_variance,HH_L5_weight,HH_L5_mean,HH_L5_std,HH_L5_magnitude,HH_L5_radius,HH_L5_covariance,HH_L5_pcc,HH_L3_weight,HH_L3_mean,HH_L3_std,...,HH_jit_L0.1_variance,HH_jit_L0.01_weight,HH_jit_L0.01_mean,HH_jit_L0.01_variance,HpHp_L5_weight,HpHp_L5_mean,HpHp_L5_std,HpHp_L5_magnitude,HpHp_L5_radius,HpHp_L5_covariance,HpHp_L5_pcc,HpHp_L3_weight,HpHp_L3_mean,HpHp_L3_std,HpHp_L3_magnitude,HpHp_L3_radius,HpHp_L3_covariance,HpHp_L3_pcc,HpHp_L1_weight,HpHp_L1_mean,HpHp_L1_std,HpHp_L1_magnitude,HpHp_L1_radius,HpHp_L1_covariance,HpHp_L1_pcc,HpHp_L0.1_weight,HpHp_L0.1_mean,HpHp_L0.1_std,HpHp_L0.1_magnitude,HpHp_L0.1_radius,HpHp_L0.1_covariance,HpHp_L0.1_pcc,HpHp_L0.01_weight,HpHp_L0.01_mean,HpHp_L0.01_std,HpHp_L0.01_magnitude,HpHp_L0.01_radius,HpHp_L0.01_covariance,HpHp_L0.01_pcc,label
0,146.055453,352.575175,58931.903411,232.135309,348.518825,59285.180744,650.351728,364.839501,57662.916172,6415.320351,381.100014,55515.473715,59471.354403,383.817678,55104.838944,146.055453,352.575175,58931.903411,232.135309,348.518825,59285.180744,650.351728,364.839501,57662.916172,6415.320351,381.100014,55515.473715,59471.354403,383.817678,55104.838944,86.502428,554.0,0.0,554.0,0.0,0.0,0.0,135.57774,554.0,1.5e-05,...,3747.075,38985.832563,2958.823,4460872000000.0,1.0,554.0,0.0,554.0,0.0,0.0,0.0,1.0,554.0,0.0,554.0,0.0,0.0,0.0,1.0,554.0,0.0,554.0,0.0,0.0,0.0,1.0,554.0,0.0,554.0,0.0,0.0,0.0,1.0,554.0,0.0,554.0,0.0,0.0,0.0,1
1,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,...,0.0,1.0,1505914000.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,2
2,175.34858,74.007747,0.18588,281.171798,74.016984,0.407329,809.265693,74.032642,0.786749,7333.704106,74.086616,20.064436,36414.069953,74.263424,158.190163,175.34858,74.007747,0.18588,281.171798,74.016984,0.407329,809.265693,74.032642,0.786749,7333.704106,74.086616,20.064436,36414.069953,74.263424,158.190163,175.291975,74.0,2e-06,95.268043,4.897779e-12,-1.078901e-28,-3.751296e-17,280.972821,74.0,2.3e-05,...,366308500000.0,36307.262557,21137.74,31831170000000.0,1.0,74.0,0.0,95.268043,0.0,0.0,0.0,1.0,74.0,0.0,95.268043,0.0,0.0,0.0,1.0,74.0,0.0,95.268043,0.0,0.0,0.0,1.020403,74.0,0.0,95.268043,0.0,0.0,0.0,1.677593,74.0,0.0,95.268043,0.0,0.0,0.0,2
3,33.587863,74.087364,2.093366,64.452935,74.119794,3.37841,159.182945,74.404193,58.408251,808.503099,75.184467,239.672595,2569.864737,75.088108,253.391223,33.587863,74.087364,2.093366,64.452935,74.119794,3.37841,159.182945,74.404193,58.408251,808.503099,75.184467,239.672595,2569.864737,75.088108,253.391223,1.0,74.0,0.0,74.0,0.0,0.0,0.0,1.0,74.0,0.0,...,0.0,1.0,1505913000.0,0.0,1.0,74.0,0.0,74.0,0.0,0.0,0.0,1.0,74.0,0.0,74.0,0.0,0.0,0.0,1.0,74.0,0.0,74.0,0.0,0.0,0.0,1.0,74.0,0.0,74.0,0.0,0.0,0.0,1.0,74.0,0.0,74.0,0.0,0.0,0.0,2
4,111.590034,547.290335,9117.034019,190.203964,516.685384,22521.099828,600.858552,463.198428,41446.404958,6160.585395,439.486741,48002.663049,47697.331151,437.246988,48552.728344,111.590034,547.290335,9117.034019,190.203964,516.685384,22521.099828,600.858552,463.198428,41446.404958,6160.585395,439.486741,48002.663049,47697.331151,437.246988,48552.728344,107.463921,566.0,1.3e-05,566.0,1.74623e-10,0.0,0.0,171.666614,566.0,0.003396,...,223302400.0,35564.814337,9839.788,14834940000000.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1
5,30.760928,60.000493,0.005472,42.31409,60.006632,0.07397,143.624242,60.045958,0.611988,1913.853148,60.088903,1.581607,18604.200918,60.098333,1.69365,30.760928,60.000493,0.005472,42.31409,60.006632,0.07397,143.624242,60.045958,0.611988,1913.853148,60.088903,1.581607,18604.200918,60.098333,1.69365,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,...,0.0,1.0,1507653000.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1
6,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,...,0.0,1.0,1505914000.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,2
7,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,...,0.0,1.0,1505914000.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,1.0,60.0,0.0,60.0,0.0,0.0,0.0,2
8,70.52512,473.703985,38183.192784,146.019579,445.928334,46338.635228,553.088909,441.974084,47369.604048,5748.712855,428.870906,50533.813861,51040.671019,435.648524,48933.794422,70.52512,473.703985,38183.192784,146.019579,445.928334,46338.635228,553.088909,441.974084,47369.604048,5748.712855,428.870906,50533.813861,51040.671019,435.648524,48933.794422,57.661103,566.0,1.5e-05,566.0,2.328306e-10,0.0,0.0,111.369501,566.0,0.0,...,11249610.0,37892.746403,6783.018,10226420000000.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1.0,566.0,0.0,566.0,0.0,0.0,0.0,1
9,96.831874,529.187391,11640.484325,160.423513,489.84246,27570.867003,499.872958,423.854652,47328.214088,5429.129438,406.945049,51005.687536,34229.413989,401.622985,52031.432271,96.831874,529.187391,11640.484325,160.423513,489.84246,27570.867003,499.872958,423.854652,47328.214088,5429.129438,406.945049,51005.687536,34229.413989,401.622985,52031.432271,91.967685,554.0,1.3e-05,554.0,1.74623e-10,0.0,0.0,139.584161,554.0,1.1e-05,...,9948577000.0,23673.229716,21197.77,31958560000000.0,91.967685,554.0,1.3e-05,554.0,1.74623e-10,0.0,0.0,139.584161,554.0,1.1e-05,554.0,1.164153e-10,0.0,0.0,368.126046,554.0,1.1e-05,554.0,1.164153e-10,0.0,0.0,3812.647144,554.0,4.9e-05,554.0,2.444722e-09,0.0,0.0,23667.63606,554.0,1.9e-05,554.0,3.49246e-10,0.0,0.0,1


## Two Features


*   Two highest scoring features from the paper:
** Host_MAC&IP-10sec-Variance
** Host_IP-10sec-Variance

*   Two highest scoring features from this implementation:
** Host_MAC&IP-100ms-Packet Count
** Host_IP-100ms-Packet Count



### Decision Trees

In [0]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split


X = s_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean']].copy()

#X = s_rand_combined_df.drop(['label'],axis=1).values

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9227392863885218

### K-Nearest Neighbour

In [0]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

X = s_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean']].copy()

#X = s_rand_combined_df.drop(['label'],axis=1).values

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

nn = KNeighborsClassifier()

nn = nn.fit(X_train,Y_train)

Y_pred = nn.predict(X_test)

nn_acc = nn.score(X_test, Y_test)
nn_acc


0.9601189352463658

## Three Features


### Decision Tree

In [0]:


X = s_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean', 'MI_dir_L3_mean']].copy()

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc



0.9599773456673589

### K-Nearest Neighbour

In [0]:
X = s_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean', 'MI_dir_L3_mean']].copy()

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

nn = KNeighborsClassifier()

nn = nn.fit(X_train,Y_train)

Y_pred = nn.predict(X_test)

nn_acc = nn.score(X_test, Y_test)
nn_acc

0.98074381725505

## Ten Features

### Decision Tree

In [0]:
X = s_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean', 'MI_dir_L3_mean' ,'H_L3_mean', 'H_L5_variance', 
                        'MI_dir_L5_variance', 'MI_dir_L1_variance', 'H_L1_variance', 'H_L3_variance', 'MI_dir_L3_variance']].copy()

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier(random_state=42)

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9885784406267699

### K-Nearest Neighbour

In [0]:
X = s_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean', 'MI_dir_L3_mean' ,'H_L3_mean', 'H_L5_variance', 
                        'MI_dir_L5_variance', 'MI_dir_L1_variance', 'H_L1_variance', 'H_L3_variance', 'MI_dir_L3_variance']].copy()

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

nn = KNeighborsClassifier(n_neighbors=3)

nn = nn.fit(X_train,Y_train)

Y_pred = nn.predict(X_test)

nn_acc = nn.score(X_test, Y_test)
nn_acc


0.9815461582027563

# Feature Category Results

## Host-IP

In [0]:
X = s_rand_combined_df.iloc[:,16:30]

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc


0.999716820841986

## Host-MAC&IP

In [0]:

X = s_rand_combined_df.iloc[:,0:15]

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc


0.9996696243156503

## Channel

In [0]:


X = s_rand_combined_df.iloc[:,30:65]

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc


0.7754861242212573

## Network Jitter

In [0]:
X = s_rand_combined_df.iloc[:,65:80]

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9982065319992448

## Socket

In [0]:
X = s_rand_combined_df.iloc[:,80:115]

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.6983198036624504

# Time Interval Results

## 100 ms

In [0]:

x1 = s_rand_combined_df.iloc[:,12:15]
x2 = s_rand_combined_df.iloc[:,27:30]
x3 = s_rand_combined_df.iloc[:,58:65]
x4 = s_rand_combined_df.iloc[:,77:80]
x5 = s_rand_combined_df.iloc[:,108:115]

x_100_frames = [x1, x2, x3, x4, x5]

X = pd.concat(x_100_frames, axis=1)

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9996696243156503

## 500 ms

In [0]:
x1 = s_rand_combined_df.iloc[:,9:12]
x2 = s_rand_combined_df.iloc[:,24:27]
x3 = s_rand_combined_df.iloc[:,51:58]
x4 = s_rand_combined_df.iloc[:,74:77]
x5 = s_rand_combined_df.iloc[:,101:108]

x_500_frames = [x1, x2, x3, x4, x5]

X = pd.concat(x_100_frames, axis=1)

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9996224277893148

## 1.5 sec

In [0]:
x1 = s_rand_combined_df.iloc[:,6:9]
x2 = s_rand_combined_df.iloc[:,21:24]
x3 = s_rand_combined_df.iloc[:,44:51]
x4 = s_rand_combined_df.iloc[:,71:74]
x5 = s_rand_combined_df.iloc[:,94:101]

x_15_frames = [x1, x2, x3, x4, x5]

X = pd.concat(x_100_frames, axis=1)

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9996696243156503

## 10 sec

In [0]:
x1 = s_rand_combined_df.iloc[:,3:6]
x2 = s_rand_combined_df.iloc[:,18:21]
x3 = s_rand_combined_df.iloc[:,37:44]
x4 = s_rand_combined_df.iloc[:,68:71]
x5 = s_rand_combined_df.iloc[:,87:94]

x_10_frames = [x1, x2, x3, x4, x5]

X = pd.concat(x_100_frames, axis=1)

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9996224277893148

## 1 min

In [0]:
x1 = s_rand_combined_df.columns[0:3]
x2 = s_rand_combined_df.columns[15:18]
x3 = s_rand_combined_df.columns[30:37]
x4 = s_rand_combined_df.columns[65:68]
x5 = s_rand_combined_df.columns[80:87]

x_1_frames = [x1, x2, x3, x4, x5]

X = pd.concat(x_100_frames, axis=1)

Y = s_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9996224277893148

# Device-Based Modeling

# Balanced Vs Skewed

## Unbalanced: Skewed to Normal

In [0]:
#Numbers taken from the specificatioons in the paper

unb_benign_df = full_benign_df.sample(n=49683)
unb_mirai_df = full_mirai_df.sample(n=2870)
unb_bashlite_df = full_bashlite_df.sample(n=2798)

unb_benign_df = unb_benign_df.drop(columns=['Unnamed: 0'])
unb_mirai_df = unb_mirai_df.drop(columns=['Unnamed: 0'])
unb_bashlite_df = unb_bashlite_df.drop(columns=['Unnamed: 0'])

unb_benign_df['label'] = 0

unb_mirai_df['label'] = 1

unb_bashlite_df['label'] = 2

frames = [unb_benign_df, unb_mirai_df, unb_bashlite_df]
unb_combined_df = pd.concat(frames)

unb1_rand_combined_df = unb_combined_df.sample(frac=1).reset_index(drop=True)


In [0]:
X = unb1_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean', 'MI_dir_L3_mean']].copy()

Y = unb1_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.941948693243406

## Unbalanced: Skewed to Attacks

In [0]:
#Numbers taken from the specificatioons in the paper

unb_benign_df = full_benign_df.sample(n=5025)
unb_mirai_df = full_mirai_df.sample(n=29351)
unb_bashlite_df = full_bashlite_df.sample(n=28382)

unb_benign_df = unb_benign_df.drop(columns=['Unnamed: 0'])
unb_mirai_df = unb_mirai_df.drop(columns=['Unnamed: 0'])
unb_bashlite_df = unb_bashlite_df.drop(columns=['Unnamed: 0'])

unb_benign_df['label'] = 0

unb_mirai_df['label'] = 1

unb_bashlite_df['label'] = 2

frames = [unb_benign_df, unb_mirai_df, unb_bashlite_df]
unb_combined_df = pd.concat(frames)

unb2_rand_combined_df = unb_combined_df.sample(frac=1).reset_index(drop=True)

In [0]:
X = unb1_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean', 'MI_dir_L3_mean']].copy()

Y = unb1_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.942370227628568

## Balanced

In [0]:
#Numbers taken from the specificatioons in the paper

unb_benign_df = full_benign_df.sample(n=4000)
unb_mirai_df = full_mirai_df.sample(n=4000)
unb_bashlite_df = full_bashlite_df.sample(n=4000)

unb_benign_df = unb_benign_df.drop(columns=['Unnamed: 0'])
unb_mirai_df = unb_mirai_df.drop(columns=['Unnamed: 0'])
unb_bashlite_df = unb_bashlite_df.drop(columns=['Unnamed: 0'])

unb_benign_df['label'] = 0

unb_mirai_df['label'] = 1

unb_bashlite_df['label'] = 2

frames = [unb_benign_df, unb_mirai_df, unb_bashlite_df]
unb_combined_df = pd.concat(frames)

unb2_rand_combined_df = unb_combined_df.sample(frac=1).reset_index(drop=True)

In [0]:
X = unb1_rand_combined_df[['H_L5_mean', 'MI_dir_L5_mean', 'MI_dir_L3_mean']].copy()

Y = unb1_rand_combined_df['label'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

dt = DecisionTreeClassifier()

dt = dt.fit(X_train,Y_train)

Y_pred = dt.predict(X_test)

dt_acc = dt.score(X_test, Y_test)
dt_acc

0.9423100084306877