In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

In [48]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Protocol,Flow Duration,Total Fwd Packets,Total Backward Packets,Fwd Packets Length Total,Bwd Packets Length Total,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,17,48,2,0,2944.0,0.0,1472.0,1472.0,1472.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DrDoS_DNS
1,17,2,2,0,2944.0,0.0,1472.0,1472.0,1472.0,0.0,...,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DrDoS_DNS
2,17,1,2,0,2944.0,0.0,1472.0,1472.0,1472.0,0.0,...,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DrDoS_DNS
3,17,1,2,0,2944.0,0.0,1472.0,1472.0,1472.0,0.0,...,1480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DrDoS_DNS
4,17,1,2,0,2896.0,0.0,1448.0,1448.0,1448.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DrDoS_DNS


In [50]:
df.columns

Index(['Protocol', 'Flow Duration', 'Total Fwd Packets',
       'Total Backward Packets', 'Fwd Packets Length Total',
       'Bwd Packets Length Total', 'Fwd Packet Length Max',
       'Fwd Packet Length Min', 'Fwd Packet Length Mean',
       'Fwd Packet Length Std', 'Bwd Packet Length Max',
       'Bwd Packet Length Min', 'Bwd Packet Length Mean',
       'Bwd Packet Length Std', 'Flow Bytes/s', 'Flow Packets/s',
       'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min',
       'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max',
       'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std',
       'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags',
       'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length',
       'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s',
       'Packet Length Min', 'Packet Length Max', 'Packet Length Mean',
       'Packet Length Std', 'Packet Length Variance', 'FIN Flag Count',
       'SYN Flag Count', 'RST Fla

In [51]:
df["Label"].value_counts()

Label
DrDoS_NTP        121368
TFTP              98917
Benign            97831
Syn               49373
UDP               18090
DrDoS_UDP         10420
UDP-lag            8872
MSSQL              8523
DrDoS_MSSQL        6212
DrDoS_DNS          3669
DrDoS_SNMP         2717
LDAP               1906
DrDoS_LDAP         1440
Portmap             685
NetBIOS             644
DrDoS_NetBIOS       598
UDPLag               55
WebDDoS              51
Name: count, dtype: int64

In [37]:
df["Label"].str.contains("DrDoS").value_counts()

Label
False    284947
True     146424
Name: count, dtype: int64

In [38]:
def get_label(x):
    if "DrDoS" in x:
        return 1
    else:
        return 0

df["Label"] = df["Label"].map(get_label)
df["Label"].value_counts()

Label
0    284947
1    146424
Name: count, dtype: int64

In [39]:
X = df.drop("Label", axis=1)
y = df["Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)


0.9451173572877427

In [41]:
print(classification_report(y_test, clf.predict(X_test)))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96     56979
           1       0.92      0.91      0.92     29296

    accuracy                           0.95     86275
   macro avg       0.94      0.94      0.94     86275
weighted avg       0.94      0.95      0.95     86275



In [42]:
model = XGBClassifier()
model.fit(X_train, y_train, verbose=True)

In [43]:
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.96      0.98      0.97     56979
           1       0.95      0.92      0.93     29296

    accuracy                           0.96     86275
   macro avg       0.96      0.95      0.95     86275
weighted avg       0.96      0.96      0.96     86275



In [44]:
model.save_model("xgboost_model.json")

In [46]:
model = XGBClassifier()  
model.load_model('xgboost_model.json')

In [67]:
packet = {'Protocol': 17,
 'Flow Duration': 2,
 'Total Fwd Packets': 2,
 'Total Backward Packets': 0,
 'Fwd Packets Length Total': 2944.0,
 'Bwd Packets Length Total': 0.0,
 'Fwd Packet Length Max': 1472.0,
 'Fwd Packet Length Min': 1472.0,
 'Fwd Packet Length Mean': 1472.0,
 'Fwd Packet Length Std': 0.0,
 'Bwd Packet Length Max': 0.0,
 'Bwd Packet Length Min': 0.0,
 'Bwd Packet Length Mean': 0.0,
 'Bwd Packet Length Std': 0.0,
 'Flow Bytes/s': 61333333.33333333,
 'Flow Packets/s': 41666.66666666666,
 'Flow IAT Mean': 48.0,
 'Flow IAT Std': 0.0,
 'Flow IAT Max': 48.0,
 'Flow IAT Min': 48.0,
 'Fwd IAT Total': 48.0,
 'Fwd IAT Mean': 48.0,
 'Fwd IAT Std': 0.0,
 'Fwd IAT Max': 48.0,
 'Fwd IAT Min': 48.0,
 'Bwd IAT Total': 0.0,
 'Bwd IAT Mean': 0.0,
 'Bwd IAT Std': 0.0,
 'Bwd IAT Max': 0.0,
 'Bwd IAT Min': 0.0,
 'Fwd PSH Flags': 0,
 'Bwd PSH Flags': 0,
 'Fwd URG Flags': 0,
 'Bwd URG Flags': 0,
 'Fwd Header Length': 0,
 'Bwd Header Length': 0,
 'Fwd Packets/s': 41666.668,
 'Bwd Packets/s': 0.0,
 'Packet Length Min': 1472.0,
 'Packet Length Max': 1472.0,
 'Packet Length Mean': 1472.0,
 'Packet Length Std': 0.0,
 'Packet Length Variance': 0.0,
 'FIN Flag Count': 0,
 'SYN Flag Count': 0,
 'RST Flag Count': 0,
 'PSH Flag Count': 0,
 'ACK Flag Count': 0,
 'URG Flag Count': 0,
 'CWE Flag Count': 0,
 'ECE Flag Count': 0,
 'Down/Up Ratio': 0.0,
 'Avg Packet Size': 2208.0,
 'Avg Fwd Segment Size': 1472.0,
 'Avg Bwd Segment Size': 0.0,
 'Fwd Avg Bytes/Bulk': 0,
 'Fwd Avg Packets/Bulk': 0,
 'Fwd Avg Bulk Rate': 0,
 'Bwd Avg Bytes/Bulk': 0,
 'Bwd Avg Packets/Bulk': 0,
 'Bwd Avg Bulk Rate': 0,
 'Subflow Fwd Packets': 2,
 'Subflow Fwd Bytes': 2944,
 'Subflow Bwd Packets': 0,
 'Subflow Bwd Bytes': 0,
 'Init Fwd Win Bytes': -1,
 'Init Bwd Win Bytes': -1,
 'Fwd Act Data Packets': 1,
 'Fwd Seg Size Min': 0,
 'Active Mean': 0.0,
 'Active Std': 0.0,
 'Active Max': 0.0,
 'Active Min': 0.0,
 'Idle Mean': 0.0,
 'Idle Std': 0.0,
 'Idle Max': 0.0,
 'Idle Min': 0.0}

In [69]:
packet = pd.DataFrame(packet, index=[0])
model.predict(packet)

array([1])