# ML for DOS Detection using k-NN and Random Forest

### Import Statements

In [1]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

### Read Data for Detection

In [10]:
data = pd.read_csv("detect_data.csv")
X = data.drop(['Attack'], axis=1)
y = data.Attack
X.head()

Unnamed: 0,SYN-ACK_Ratio,ICMP_Count,DNS_Ratio
0,0.007076,26,0.00278
1,0.007714,18,0.001175
2,0.024967,23,0.00139
3,0.025346,21,0.001112
4,0.011057,20,0.00109


### Split into Training and Test Data

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

### Preprocessing Step

In [12]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Train the kNN model

In [32]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

0.9902912621359223

### Confusion Matrix

In [14]:
confusion_matrix(y_test, knn.predict(X_test))

array([[40,  0,  0,  0],
       [ 0, 11,  0,  0],
       [ 0,  0, 11,  0],
       [ 1,  0,  0, 40]])

### Train the Random Forest model

In [15]:
rf = RandomForestClassifier(n_estimators=20, random_state=0)
rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9902912621359223

### Confusion Matrix

In [16]:
confusion_matrix(y_test, rf.predict(X_test))

array([[40,  0,  0,  0],
       [ 0, 11,  0,  0],
       [ 0,  0, 11,  0],
       [ 1,  0,  0, 40]])

### Export Model

In [17]:
from joblib import dump, load
dump(rf, "model.model")

['model.model']

In [31]:
rf.predict([[0.74, 0, 0.012]])

array([2])