In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFECV
from sklearn.metrics import  auc, classification_report, confusion_matrix, accuracy_score, roc_curve
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('BoTNeTIoT-L01-v2.csv')

In [None]:
df = df.drop(columns=['Device_Name','Attack','Attack_subType'], errors='ignore')

In [None]:
X = df.drop('label',axis=1)
y = df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
estimator = RandomForestClassifier(n_estimators=10, random_state=42, n_jobs=-1, verbose=1)
skf = StratifiedKFold(n_splits=5)
rfecv = RFECV(estimator, step=1, cv=skf, scoring='accuracy', n_jobs=-1)
rfecv.fit(X_train, y_train)
X_train_sel = rfecv.transform(X_train)
X_test_sel = rfecv.transform(X_test)

In [None]:
print("Optimal number of features:", rfecv.n_features_)

In [None]:
y_pred_rf = rfecv.predict(X_test_sel)
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))

In [None]:
rf_probs = rfecv.predict_proba(X_test_sel)[:, 1]
fpr_rf, tpr_rf, _ = roc_curve(y_test, rf_probs)
auc_rf = auc(fpr_rf, tpr_rf)

plt.figure(figsize=(10,6))
plt.plot(fpr_rf, tpr_rf, label=f'Random Forest (AUC = {auc_rf:.2f})')
plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()