# Loading the Preprocessed data

In [1]:
import pandas as pd

df = pd.read_csv('cleaned_IOT.csv')
df

Unnamed: 0,id.orig_p,id.resp_p,flow_duration,fwd_pkts_tot,bwd_pkts_tot,fwd_data_pkts_tot,bwd_data_pkts_tot,fwd_pkts_per_sec,bwd_pkts_per_sec,flow_pkts_per_sec,...,idle.max,idle.tot,idle.avg,idle.std,fwd_init_window_size,bwd_init_window_size,fwd_last_window_size,proto,service,Attack_type
0,0.196030,0.155128,0.221961,0.303454,0.090645,0.079306,0.065246,-0.947016,-0.946793,-0.946908,...,2.959698,0.216062,3.044335,-0.042378,3.069373,2.363506,-0.038155,1,5,3
1,0.853840,0.155128,0.220953,0.303454,0.090645,0.079306,0.065246,-0.947016,-0.946793,-0.946908,...,2.973048,0.217102,3.058048,-0.042378,3.069373,2.363506,-0.038155,1,5,3
2,0.517343,0.155128,0.222846,0.303454,0.090645,0.079306,0.065246,-0.947016,-0.946793,-0.946908,...,2.971658,0.216994,3.056620,-0.042378,3.069373,2.363506,-0.038155,1,5,3
3,1.367919,0.155128,0.221563,0.303454,0.090645,0.079306,0.065246,-0.947016,-0.946793,-0.946908,...,2.979242,0.217584,3.064410,-0.042378,3.069373,2.363506,-0.038155,1,5,3
4,0.850888,0.155128,0.221101,0.303454,0.090645,0.079306,0.065246,-0.947016,-0.946793,-0.946908,...,2.968753,0.216767,3.053636,-0.042378,3.069373,2.363506,-0.038155,1,5,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117917,1.281132,11.609661,-0.029973,-0.058350,-0.027953,-0.076248,-0.025693,-0.494437,-0.494261,-0.494350,...,-0.187945,-0.029149,-0.188885,-0.042378,-0.276536,-0.276969,0.045468,1,0,9
117918,1.281132,11.850503,-0.029973,-0.058350,-0.027953,-0.076248,-0.025693,-0.556862,-0.556679,-0.556772,...,-0.187945,-0.029149,-0.188885,-0.042378,-0.276536,-0.276969,0.045468,1,0,9
117919,1.281132,11.861128,-0.029973,-0.058350,-0.027953,-0.076248,-0.025693,-0.494437,-0.494261,-0.494350,...,-0.187945,-0.029149,-0.188885,-0.042378,-0.276536,-0.276969,0.045468,1,0,9
117920,1.281132,11.920779,-0.029973,-0.058350,-0.027953,-0.076248,-0.025693,-0.494437,-0.494261,-0.494350,...,-0.187945,-0.029149,-0.188885,-0.042378,-0.276536,-0.276969,0.045468,1,0,9


# Modelling

# Splitting the data into Train and Test

In [2]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=['Attack_type'])
y = df['Attack_type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Random Forest - SKLearn

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)

y_pred_rf = model_rf.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

print("Accuracy of Random Forest:", accuracy_rf)
print('F1 Score of Random Forest:', f1_rf)

Accuracy of Random Forest: 0.9980778471888515
F1 Score of Random Forest: 0.9980662301203854


In [4]:
classif_report_rf = classification_report(y_test, y_pred_rf)
print('Classification Report (Random Forest):\n', classif_report_rf)

Classification Report (Random Forest):
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      2285
           1       0.98      0.98      0.98       158
           2       1.00      1.00      1.00     27077
           3       1.00      1.00      1.00      1263
           4       0.91      0.83      0.87        12
           5       1.00      0.75      0.86         8
           6       1.00      1.00      1.00       577
           7       1.00      1.00      1.00       309
           8       0.99      0.98      0.99       760
           9       1.00      1.00      1.00       579
          10       0.99      0.99      0.99      2287
          11       0.98      0.87      0.92        62

    accuracy                           1.00     35377
   macro avg       0.99      0.95      0.97     35377
weighted avg       1.00      1.00      1.00     35377



# XGBoost

In [6]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

parameters_xgb = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5],
    'learning_rate': [0.1, 0.3],
}

model_xgb = xgb.XGBClassifier(random_state=42)

grid_search = GridSearchCV(estimator=model_xgb, param_grid=parameters_xgb, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Best Parameters:", best_params)

Best Parameters: {'learning_rate': 0.3, 'max_depth': 3, 'n_estimators': 200}


In [7]:
best_xgb = xgb.XGBClassifier(**best_params, random_state=42)
best_xgb.fit(X_train, y_train)

y_pred_xgb = best_xgb.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
f1_xgb = f1_score(y_test, y_pred_xgb, average='weighted')

print("Accuracy (XGBoost):", accuracy_xgb)
print("F1 Score (XGBoost):", f1_xgb)

Accuracy (XGBoost): 0.9981343810950618
F1 Score (XGBoost): 0.9981275520196008


In [8]:
classif_report_xgb = classification_report(y_test, y_pred_xgb)
print('Classification Report (XGBoost):\n', classif_report_xgb)

Classification Report (XGBoost):
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      2285
           1       0.98      1.00      0.99       158
           2       1.00      1.00      1.00     27077
           3       1.00      1.00      1.00      1263
           4       0.83      0.83      0.83        12
           5       1.00      0.75      0.86         8
           6       1.00      1.00      1.00       577
           7       1.00      1.00      1.00       309
           8       0.99      0.98      0.99       760
           9       1.00      1.00      1.00       579
          10       0.99      0.99      0.99      2287
          11       1.00      0.90      0.95        62

    accuracy                           1.00     35377
   macro avg       0.98      0.95      0.97     35377
weighted avg       1.00      1.00      1.00     35377



# Support Vector Machine (SVM)

In [10]:
from sklearn.svm import SVC

model_svm = SVC(random_state=42)
model_svm.fit(X_train, y_train)

y_pred_svm = model_svm.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm, average='weighted')

print("Accuracy (SVM):", accuracy_svm)
print("F1 Score (SVM):", f1_svm)

Accuracy (SVM): 0.9938943381292931
F1 Score (SVM): 0.9937198079871747


In [11]:
classif_report_svm = classification_report(y_test, y_pred_svm)
print('Classification Report (SVM):\n', classif_report_svm)

Classification Report (SVM):
               precision    recall  f1-score   support

           0       0.94      0.99      0.96      2285
           1       1.00      0.81      0.90       158
           2       1.00      1.00      1.00     27077
           3       1.00      0.99      1.00      1263
           4       1.00      0.75      0.86        12
           5       0.67      0.75      0.71         8
           6       1.00      1.00      1.00       577
           7       1.00      1.00      1.00       309
           8       0.96      0.98      0.97       760
           9       1.00      1.00      1.00       579
          10       0.98      0.96      0.97      2287
          11       1.00      0.47      0.64        62

    accuracy                           0.99     35377
   macro avg       0.96      0.89      0.92     35377
weighted avg       0.99      0.99      0.99     35377



# SVM - LIBSVM

In [13]:
from libsvm.svmutil import svm_train, svm_predict

X_libsvm = X_train.values.tolist()
X_test_libsvm = X_test.values.tolist()
y_libsvm = y_train.values.tolist()
y_test_libsvm = y_test.values.tolist()

model_libsvm = svm_train(y_libsvm, X_libsvm, '-s 0 -t 0 -c 1')

# Make predictions on the test set
svm_labels, _, _ = svm_predict(y_test_libsvm, X_test_libsvm, model_libsvm)

libsvm_accuracy = accuracy_score(y_test_libsvm, svm_labels)
libsvm_f1 = f1_score(y_test_libsvm, svm_labels, average='weighted')

# Print the accuracy
print('Accuracy (LIBSVM): ', libsvm_accuracy)
print('F1-score (LIBSVM): ', libsvm_f1)

  @jit


Accuracy = 99.4318% (35176/35377) (classification)
Accuracy (LIBSVM):  0.9943183424258699
F1-score (LIBSVM):  0.9942379994603888


In [14]:
classif_report_libsvm = classification_report(y_test_libsvm, svm_labels)
print('Classification Report (LIBSVM):\n', classif_report_libsvm)

Classification Report (LIBSVM):
               precision    recall  f1-score   support

           0       0.95      0.98      0.97      2285
           1       1.00      0.81      0.90       158
           2       1.00      1.00      1.00     27077
           3       1.00      1.00      1.00      1263
           4       0.82      0.75      0.78        12
           5       1.00      0.75      0.86         8
           6       1.00      1.00      1.00       577
           7       1.00      1.00      1.00       309
           8       0.96      0.98      0.97       760
           9       1.00      1.00      1.00       579
          10       0.97      0.97      0.97      2287
          11       1.00      0.68      0.81        62

    accuracy                           0.99     35377
   macro avg       0.98      0.91      0.94     35377
weighted avg       0.99      0.99      0.99     35377

