In [11]:
import pandas as pd
import joblib  # for loading .pkl model files
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Load the pre-trained LightGBM model
model = joblib.load('best_re-trained_lgb.pkl')

# Load your dataset with ',' as the separator
file_path = 'combined_captured_data_MQTT_final.csv'  # Update this with your dataset path
df = pd.read_csv(file_path, sep=',')

# Adjust the feature list to match the actual column names in your dataset
expected_features = [
    'tcp_flags', 'tcp_time_delta', 'tcp_len', 'mqtt_conack_flags', 
    'mqtt_conflag_cleansess', 'mqtt_conflags', 'mqtt_dupflag', 
    'mqtt_hdrflags', 'mqtt_kalive', 'mqtt_msg', 'mqtt_qos'
]

# Check if the expected features exist in the dataset
if all(feature in df.columns for feature in expected_features):
    # Separate features (X) and target (y)
    X = df[expected_features]  # Use only the specified features
    y_true = df['target']  # The target column is 'target'

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Mapping for label encoding and decoding
    label_mapping = {
        "legitimate": 0,
        "dos": 1,
        "slowite": 2,
        "malformed": 3,
        "bruteforce": 4,
        "flood": 5
    }

    # Reverse mapping to convert predictions back to string labels
    reverse_label_mapping = {v: k for k, v in label_mapping.items()}

    # Convert true labels to string format using reverse mapping
    y_true = y_true.map(reverse_label_mapping)

    # Make predictions using the loaded model
    y_pred_numeric = model.predict(X_scaled)

    # Convert numeric predictions back to string labels
    y_pred = [reverse_label_mapping[pred] for pred in y_pred_numeric]

    # Generate the classification report
    report = classification_report(y_true, y_pred, target_names=label_mapping.keys(), zero_division=1)

    # Print the classification report
    print(report)
else:
    missing_features = [feature for feature in expected_features if feature not in df.columns]
    print(f"The following required features are missing from your dataset: {missing_features}")


              precision    recall  f1-score   support

  legitimate       0.70      0.89      0.79    105528
         dos       0.94      0.94      0.94    105528
     slowite       1.00      0.44      0.61    105528
   malformed       0.34      0.39      0.36    105528
  bruteforce       0.81      0.90      0.85    105528
       flood       0.94      0.98      0.96    105528

    accuracy                           0.76    633168
   macro avg       0.79      0.76      0.75    633168
weighted avg       0.79      0.76      0.75    633168



In [12]:
import pandas as pd
import joblib  # for loading .pkl model files
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Load the pre-trained LightGBM model
model = joblib.load('best_lgb.pkl')

# Load your dataset with ',' as the separator
file_path = 'combined_captured_data_MQTT_final.csv'  # Update this with your dataset path
df = pd.read_csv(file_path, sep=',')

# Adjust the feature list to match the actual column names in your dataset
expected_features = [
    'tcp_flags', 'tcp_time_delta', 'tcp_len', 'mqtt_conack_flags', 
    'mqtt_conflag_cleansess', 'mqtt_conflags', 'mqtt_dupflag', 
    'mqtt_hdrflags', 'mqtt_kalive', 'mqtt_msg', 'mqtt_qos'
]

# Check if the expected features exist in the dataset
if all(feature in df.columns for feature in expected_features):
    # Separate features (X) and target (y)
    X = df[expected_features]  # Use only the specified features
    y_true = df['target']  # The target column is 'target'

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Mapping for label encoding and decoding
    label_mapping = {
        "legitimate": 0,
        "dos": 1,
        "slowite": 2,
        "malformed": 3,
        "bruteforce": 4,
        "flood": 5
    }

    # Reverse mapping to convert predictions back to string labels
    reverse_label_mapping = {v: k for k, v in label_mapping.items()}

    # Convert true labels to string format using reverse mapping
    y_true = y_true.map(reverse_label_mapping)

    # Make predictions using the loaded model
    y_pred_numeric = model.predict(X_scaled)

    # Convert numeric predictions back to string labels
    y_pred = [reverse_label_mapping[pred] for pred in y_pred_numeric]

    # Generate the classification report
    report = classification_report(y_true, y_pred, target_names=label_mapping.keys(), zero_division=1)

    # Print the classification report
    print(report)
else:
    missing_features = [feature for feature in expected_features if feature not in df.columns]
    print(f"The following required features are missing from your dataset: {missing_features}")


              precision    recall  f1-score   support

  legitimate       0.61      0.49      0.54    105528
         dos       0.24      0.53      0.33    105528
     slowite       0.20      0.02      0.03    105528
   malformed       0.13      0.33      0.19    105528
  bruteforce       1.00      0.00      0.00    105528
       flood       0.05      0.03      0.03    105528

    accuracy                           0.23    633168
   macro avg       0.37      0.23      0.19    633168
weighted avg       0.37      0.23      0.19    633168

