In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = "iot5_undersampled.csv"  # Ensure this file exists
df = pd.read_csv(file_path)

# Identify the target column
target_column = 'attack'  # Modify if needed

# Splitting features and target
X = df.drop(columns=[target_column])
y = df[target_column]

# Convert categorical variables to numerical
X = pd.get_dummies(X)

# Fill missing values
X = X.apply(pd.to_numeric, errors='coerce')
X.fillna(X.mean(), inplace=True)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define base models
rf_clf = RandomForestClassifier(n_estimators=50, max_depth=5, min_samples_split=10, min_samples_leaf=5, random_state=42, n_jobs=-1)
xgb_clf = XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1, subsample=0.8, colsample_bytree=0.8, random_state=42, use_label_encoder=False, eval_metric="logloss")

# Define meta-model
meta_model = LogisticRegression()

# Define Stacking Classifier
stacking_clf = StackingClassifier(
    estimators=[('rf', rf_clf), ('xgb', xgb_clf)],  # Base models
    final_estimator=meta_model,  # Meta-model
    passthrough=True,  # If True, meta-model gets original features + base model predictions
    n_jobs=-1
)

# Train the stacking classifier
stacking_clf.fit(X_train, y_train)

# Evaluate model
train_acc = accuracy_score(y_train, stacking_clf.predict(X_train))
test_acc = accuracy_score(y_test, stacking_clf.predict(X_test))
cv_scores = cross_val_score(stacking_clf, X_train, y_train, cv=5)

print(f"Training Accuracy: {train_acc:.4f}")
print(f"Testing Accuracy: {test_acc:.4f}")
print(f"Cross-Validation Score (mean): {cv_scores.mean():.4f}")


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Training Accuracy: 0.9090
Testing Accuracy: 0.9067
Cross-Validation Score (mean): 0.9504
