In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

In [2]:
dtypes = {
    'Src IP': 'category',
    'Src Port': 'uint16',
    'Dst IP': 'category',
    'Dst Port': 'uint16',
    'Protocol': 'category',
    'Flow Duration': 'uint32',
    'Fwd Pkt Len Max': 'float32',
    'Fwd Pkt Len Min': 'float32',
    'Bwd Pkt Len Max': 'float32',
    'Bwd Pkt Len Min': 'float32',
    'Flow IAT Mean': 'float32',
    'Flow IAT Std': 'float32',
    'Flow IAT Max': 'float32',
    'Flow IAT Min': 'float32',
    'Fwd IAT Tot': 'float32',
    'Fwd IAT Mean': 'float32',
    'Fwd IAT Std': 'float32',
    'Bwd IAT Mean': 'float32',
    'Bwd IAT Std': 'float32',
    'Fwd Pkts/s': 'float32',
    'Bwd Pkts/s': 'float32',
    'Pkt Len Min': 'float32',
    'Pkt Len Max': 'float32',
    'Pkt Len Mean': 'float32',
    'Pkt Len Std': 'float32',
    'Pkt Len Var': 'float32',
    'SYN Flag Cnt': 'category',
    'RST Flag Cnt': 'category',
    'PSH Flag Cnt': 'category',
    'ACK Flag Cnt': 'category',
    'ECE Flag Cnt': 'category',
    'CWE Flag Count': 'category',
    'Down/Up Ratio': 'float32',
    'Pkt Size Avg': 'float32',
    'Init Fwd Win Byts': 'uint32',
    'Init Bwd Win Byts': 'uint32',
    'Active Mean': 'float32',
    'Active Std': 'float32',
    'Active Max': 'float32',
    'Active Min': 'float32',
    'Idle Mean': 'float32',
    'Idle Std': 'float32',
    'Idle Max': 'float32',
    'Idle Min': 'float32',
    'Label': 'category'
}

In [3]:
df = pd.read_csv("E:\\DataSet\\final_dataset.csv",
                 dtype=dtypes,
                 usecols=[*dtypes.keys()],
                 engine='c',
                 low_memory=True)
#df = df.sample(frac=0.7)
del dtypes

In [4]:
df=df.drop(['Src IP','Dst IP'], axis=1)

In [5]:
df['Label'] = df['Label'].map({'ddos':1, 'Benign':0})

In [6]:
cols=['Protocol','SYN Flag Cnt','ACK Flag Cnt','RST Flag Cnt','PSH Flag Cnt','CWE Flag Count','ECE Flag Cnt','Label']
for col in cols:
    df[col]=df[col].astype('uint16')
del cols

In [7]:
Y=df['Label']
X=df.drop(['Label'], axis=1)
del df

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=1)
del X,Y

In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)

In [10]:
Y_train = Y_train.values.reshape(-1, 1)
Y_test = Y_test.values.reshape(-1, 1)

In [11]:
#RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, np.ravel(Y_train))

In [12]:
RF_pred = model.predict(X_test)
print("Accuracy: ", accuracy_score(Y_test,RF_pred))
print("Precision Score: ", precision_score(Y_test,RF_pred))
print("Recall Score: ", recall_score(Y_test,RF_pred))
print("F1 Score: ", f1_score(Y_test,RF_pred))

In [None]:
from sklearn.linear_model import LogisticRegression
logreg=LogisticRegression(solver='sag',random_state=123)
logreg.fit(X_train,np.ravel(Y_train))

In [None]:
#Evaluating on test set
LR_pred=logreg.predict(X_test)

#Scores
print("Accuracy: ", accuracy_score(Y_test,LR_pred))
print("Precision Score: ", precision_score(Y_test,LR_pred))
print("Recall Score: ", recall_score(Y_test,LR_pred))
print("F1 Score: ", f1_score(Y_test,LR_pred))

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel ='sigmoid', random_state = 0)
svc.fit(X_train, np.ravel(Y_train))

In [None]:
#Evaluating with test set
svc_pred=svc.predict(X_test)

#Scores
print("Accuracy: ", accuracy_score(Y_test,svc_pred))
print("Precision Score: ", precision_score(Y_test,svc_pred))
print("Recall Score: ", recall_score(Y_test,svc_pred))
print("F1 Score: ", f1_score(Y_test,svc_pred))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, np.ravel(Y_train))

In [None]:
#Evalutating on test set
knn_pred=knn.predict(X_test)

#Scores
print("Accuracy: ", accuracy_score(Y_test,knn_pred))
print("Precision Score: ", precision_score(Y_test,knn_pred))
print("Recall Score: ", recall_score(Y_test,knn_pred))
print("F1 Score: ", f1_score(Y_test,knn_pred))