In [95]:
import tensorflow as tf
import sklearn 
import pandas as pd 
import numpy as np
from numpy import unique
from sklearn import feature_selection
from itertools import combinations
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import LabelEncoder 
from sklearn import preprocessing as prp
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report


In [96]:
#Reading Dataset into a dataframe
#Malicious_data = pd.read_csv("C:\Jupyter running\Malicious.csv", index_col=0) 
Malicious_data = pd.read_csv("C:\Jupyter running\DrDoS_DNS.csv", index_col=0, low_memory=False)

  mask |= (ar1 == a)


In [97]:
#Printing no of rows and columns of dataframe
Malicious_data.shape

(1048575, 87)

In [98]:
#Separating features and label 
Malicious_features = Malicious_data.copy()
Malicious_labels = Malicious_features.pop('TrafficNature')
print("Features Dataframe Shape after splitting label:\n",Malicious_features.shape)
print("Label Dataframe shape after splitting label:\n",Malicious_labels.shape)

Features Dataframe Shape after splitting label:
 (1048575, 86)
Label Dataframe shape after splitting label:
 (1048575,)


In [99]:
# To get number of unique values
counts=Malicious_features.nunique()
#To delete the columns with only 1 unique value
to_del=[i for i,v in enumerate(counts) if v==1]
colname = Malicious_features.columns[to_del]
Malicious_features.drop(colname, axis=1, inplace= True)
print("Features Dataframe Shape after Deleting columns with only 1 unique value:\n",Malicious_features.shape)

Features Dataframe Shape after Deleting columns with only 1 unique value:
 (1048575, 74)


In [100]:
#Deleting the columns with less variance
to_del=[i for i,v in enumerate(Malicious_features.nunique()) if float(v)/Malicious_features.shape[0]* 100 < 1]
colname = Malicious_features.columns[to_del]
Malicious_features.drop(colname, axis=1, inplace= True)
print("Features Dataframe Shape after Deleting columns with less variance:\n",Malicious_features.shape)

Features Dataframe Shape after Deleting columns with less variance:
 (1048575, 11)


In [101]:
# Deleting columns of datatype object
String_columns=Malicious_features.columns[Malicious_features.dtypes == 'object']
Malicious_features.drop(String_columns,axis=1, inplace= True)
print("Features Dataframe Shape after Deleting columns of object(string) datatype:\n",Malicious_features.shape)

Features Dataframe Shape after Deleting columns of object(string) datatype:
 (1048575, 10)


In [102]:
#Deleting columns with infinity values
col_name = Malicious_features.columns.to_series()[np.isinf(Malicious_features).any()] 
Malicious_features.drop(columns=col_name, inplace=True)
print("Features Dataframe Shape after Deleting columns with infinity values:\n",Malicious_features.shape)

Features Dataframe Shape after Deleting columns with infinity values:
 (1048575, 8)


In [103]:
#selecting top 20 features from the available features
select=sklearn.feature_selection.SelectKBest(k=5)
selected_features= select.fit(Malicious_features,Malicious_labels)
indices_selected=selected_features.get_support(indices=True)
colnames_selected = [Malicious_features.columns[i] for i in indices_selected]
Malicious_features=Malicious_features[colnames_selected]
print("Features Dataframe Shape after selecting best 5 features:\n",Malicious_features.shape)

Features Dataframe Shape after selecting best 5 features:
 (1048575, 5)


In [104]:
#function to create feature interactions
def add_interactions(df):
 combos= list(combinations(list(df.columns), 2))
 colnames = list(df.columns) + ['_'.join(x) for x in combos]
#finding interaction level
 poly = PolynomialFeatures(interaction_only=True, include_bias=False)
 df=poly.fit_transform(df)
 df=pd.DataFrame(df)
 df.columns=colnames  
#Removing interaction terms with all 0 values
 noint_indicies=[i for i, x in enumerate(list((df==0).all())) if x]
 df=df.drop(df.columns[noint_indicies], axis=1)
 return df 
#Calling add_interaction function
Malicious_features= add_interactions(Malicious_features)
print("Features Dataframe Shape after developing features interaction:\n",Malicious_features.shape)

Features Dataframe Shape after developing features interaction:
 (1048575, 15)


In [105]:
#Dimentionality reduction
pca = PCA(n_components=10)
Malicious_features = pd.DataFrame(pca.fit_transform(Malicious_features))
print("Features Dataframe Shape after reducing dimentions to 10:\n",Malicious_features.shape)

Features Dataframe Shape after reducing dimentions to 10:
 (1048575, 10)


In [106]:
#creating Training and Testing sets
Malicious_features_train, Malicious_features_test, Malicious_labels_train, Malicious_labels_test = train_test_split(Malicious_features, Malicious_labels, test_size=0.2, random_state=1)


In [107]:
#Encoding Training and Testing labels
le= LabelEncoder()
test= Malicious_labels_train.values
le.fit(test)
Malicious_labels_train=le.transform(test)

le= LabelEncoder()
test= Malicious_labels_test.values
le.fit(test)
Malicious_labels_test=le.transform(test)

In [108]:
#Normalization 
normalize = preprocessing.Normalization()
Malicious_features_train=np.array(Malicious_features_train)
Malicious_features_test=np.array(Malicious_features_test)

#Scalaing the data
scale_1 =prp.MaxAbsScaler()
Malicious_features_train = scale_1.fit_transform(Malicious_features_train)
Malicious_features_test = scale_1.fit_transform(Malicious_features_test)

In [109]:
X=Malicious_features_train
y=Malicious_labels_train
X1=Malicious_features_test
y1=Malicious_labels_test

In [110]:
# Different Machine learning Models
# Logistic Regression
LR = LogisticRegression(random_state=1, max_iter=100, multi_class="multinomial").fit(X, y)
Logistic_Result=LR.predict(X1)
print(classification_report(y1, Logistic_Result))

              precision    recall  f1-score   support

           0       0.94      0.08      0.14       386
           1       1.00      1.00      1.00    209329

    accuracy                           1.00    209715
   macro avg       0.97      0.54      0.57    209715
weighted avg       1.00      1.00      1.00    209715



In [111]:
# Support Vector Machines
SVM = svm.LinearSVC()
SVM.fit(X, y)
SVM_Result=SVM.predict(X1)
print(classification_report(y1, SVM_Result))

              precision    recall  f1-score   support

           0       0.98      0.11      0.20       386
           1       1.00      1.00      1.00    209329

    accuracy                           1.00    209715
   macro avg       0.99      0.55      0.60    209715
weighted avg       1.00      1.00      1.00    209715



In [112]:
# Random Forets
RF = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)
RF.fit(X, y)
RandonForest_Result=RF.predict(X1)
print(classification_report(y1, RandonForest_Result))

              precision    recall  f1-score   support

           0       0.83      0.20      0.32       386
           1       1.00      1.00      1.00    209329

    accuracy                           1.00    209715
   macro avg       0.91      0.60      0.66    209715
weighted avg       1.00      1.00      1.00    209715



In [113]:
# Neural Network alpha=1e-5 random_state=1
NN = MLPClassifier(solver='adam', hidden_layer_sizes=(5, 2))
NN.fit(X, y)
Neural_Netwrok_Result=NN.predict(X1)
print(classification_report(y1, Neural_Netwrok_Result))

              precision    recall  f1-score   support

           0       0.95      0.13      0.24       386
           1       1.00      1.00      1.00    209329

    accuracy                           1.00    209715
   macro avg       0.97      0.57      0.62    209715
weighted avg       1.00      1.00      1.00    209715

