In [4]:
from autogluon.tabular import TabularDataset, TabularPredictor
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight


PROJECT_ROOT = os.path.abspath("..")
train_path = os.path.join(PROJECT_ROOT, "data", "unsw-nb15","raw", "UNSW_NB15_training-set.csv")
test_path = os.path.join(PROJECT_ROOT, "data", "unsw-nb15","raw", "UNSW_NB15_testing-set.csv")

train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

DROP_COLS = ["id", "label"]
train_df = train_df.drop(columns=DROP_COLS, errors="ignore")
test_df  = test_df.drop(columns=DROP_COLS, errors="ignore")

full_df = pd.concat([train_df, test_df], ignore_index=True)

train_df, test_df = train_test_split(
    full_df,
    test_size=0.2,
    stratify=full_df["attack_cat"],
    random_state=42
)

LABEL = "attack_cat"

# Compute class weights
classes = np.unique(train_df[LABEL])
weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=train_df[LABEL]
)
class_weights = dict(zip(classes, weights))
print(class_weights)

# Create sample weights for training data only
train_df['sample_weight'] = train_df[LABEL].map(class_weights)

predictor = TabularPredictor(
    label="attack_cat",
    problem_type="multiclass",
    eval_metric="accuracy",
    sample_weight='sample_weight'
)

# Force GPU usage with proper configuration
predictor.fit(
    train_data=train_df,
    hyperparameters = {
    "NN_TORCH": [
        {
            "num_epochs": 50,
            "hidden_size": 256,
            "dropout_prob": 0.3
        },
        {
            "num_epochs": 80,
            "hidden_size": 512,
            "dropout_prob": 0.4
        },
        {
            "num_epochs": 100,
            "hidden_size": 1024,
            "dropout_prob": 0.5
        }
    ]
},
    time_limit=7200,
    num_stack_levels=0,  # Disable stacking completely for faster training
    num_bag_folds=5,
    ag_args_fit={
        "num_gpus": 1  # Global GPU setting
    }
)

# Evaluate on test set
results = predictor.evaluate(test_df)
print("\nTest Results:")
print(results)

# Get predictions
y_pred = predictor.predict(test_df)
from sklearn.metrics import classification_report

print("\nClassification Report:")
print(classification_report(test_df[LABEL], y_pred))

No path specified. Models will be saved in: "AutogluonModels/ag-20251224_131436"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.5.0
Python Version:     3.10.19
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #89~22.04.2-Ubuntu SMP PREEMPT_DYNAMIC Wed Oct 29 10:45:25 UTC 2
CPU Count:          32
Pytorch Version:    2.9.1+cu128
CUDA Version:       12.8
GPU Memory:         GPU 0: 47.52/47.53 GB | GPU 1: 47.52/47.52 GB
Total GPU Memory:   Free: 95.04 GB, Allocated: 0.01 GB, Total: 95.05 GB
GPU Count:          2
Memory Avail:       18.97 GB / 125.59 GB (15.1%)
Disk Space Avail:   1279.91 GB / 11087.69 GB (11.5%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='extreme'  : New in v1.5: The state-of-the-art for tabular data. Massively 

{'Analysis': np.float64(9.623622782446311), 'Backdoor': np.float64(11.06484165324745), 'DoS': np.float64(1.5757376547928452), 'Exploits': np.float64(0.5787142055025267), 'Fuzzers': np.float64(1.0627313502087952), 'Generic': np.float64(0.4376881754676519), 'Normal': np.float64(0.2770672043010753), 'Reconnaissance': np.float64(1.8423272857270534), 'Shellcode': np.float64(17.050289495450787), 'Worms': np.float64(148.30071942446042)}


Values in column 'sample_weight' used as sample weights instead of predictive features. Evaluation metrics will ignore sample weights, specify weight_evaluation=True to instead report weighted metrics.
Beginning AutoGluon training ... Time limit = 7200s
AutoGluon will save models to "/home/e20094/e20-4yp-backdoor-resilient-federated-nids/notebooks/AutogluonModels/ag-20251224_131436"
Train Data Rows:    206138
Train Data Columns: 43
Label Column:       attack_cat
Problem Type:       multiclass
Preprocessing data ...
Train Data Class Count: 10
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    18387.93 MB
	Train Data (Original)  Memory Usage: 96.59 MB (0.5% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 1 features to boolea


Test Results:
{'accuracy': 0.8250121276802174, 'balanced_accuracy': np.float64(0.5009610944711875), 'mcc': 0.7758936322853301}

Classification Report:
                precision    recall  f1-score   support

      Analysis       0.94      0.05      0.10       535
      Backdoor       0.68      0.08      0.15       466
           DoS       0.70      0.05      0.10      3271
      Exploits       0.61      0.94      0.74      8905
       Fuzzers       0.70      0.53      0.60      4849
       Generic       0.99      0.98      0.99     11774
        Normal       0.90      0.94      0.92     18600
Reconnaissance       0.91      0.76      0.82      2798
     Shellcode       0.57      0.56      0.57       302
         Worms       0.80      0.11      0.20        35

      accuracy                           0.83     51535
     macro avg       0.78      0.50      0.52     51535
  weighted avg       0.84      0.83      0.80     51535



In [5]:
from autogluon.tabular import TabularPredictor
predictor = TabularPredictor.load("/home/e20094/e20-4yp-backdoor-resilient-federated-nids/notebooks/AutogluonModels/ag-20251224_121820")
print(predictor.feature_metadata)
# This will confirm exactly which columns were treated as what type

('category', [])  :  3 | ['proto', 'service', 'state']
('float', [])     : 11 | ['dur', 'rate', 'sload', 'dload', 'sinpkt', ...]
('int', [])       : 26 | ['spkts', 'dpkts', 'sbytes', 'dbytes', 'sttl', ...]
('int', ['bool']) :  1 | ['is_sm_ips_ports']


In [7]:
from autogluon.tabular import TabularPredictor
import pprint

# 1. Load the AutoGluon Predictor
save_path = "/home/e20094/e20-4yp-backdoor-resilient-federated-nids/notebooks/AutogluonModels/ag-20251224_121820"
predictor = TabularPredictor.load(save_path)

print("=== 1. Best Model Info ===")
# FIX: Use .model_best (property) instead of .get_model_best()
best_model_name = predictor.model_best
print(f"Best Model: {best_model_name}")

# 2. Inspect the Neural Network specifically
model_names = predictor.model_names() # Updated method name
nn_models = [m for m in model_names if "NeuralNetTorch" in m]

if nn_models:
    nn_name = nn_models[0]
    print(f"\n=== 2. Analyzing: {nn_name} ===")
    
    # Get model info / hyperparameters
    model_info = predictor.model_info(nn_name)
    
    print("\n--- Hyperparameters ---")
    # We specifically want to see hidden_size, activation, etc.
    pprint.pprint(model_info.get('hyperparameters', {}))
    
    print("\n--- Model Arguments (Architecture) ---")
    # Sometimes architecture details are in 'ag_args_fit' or root keys
    pprint.pprint(model_info.get('ag_args_fit', {}))

    print("\n=== 3. Feature Metadata ===")
    # This tells us exactly which features are float vs int vs category
    print(predictor.feature_metadata)
    
    print("\n=== 4. Exact Input Feature List ===")
    # These are the columns expected by the model after dropping useless ones
    # In 1.5.0 we can often access this via feature_generator
    if hasattr(predictor, 'feature_generator'):
        print(predictor.feature_generator.features_in)
    else:
        print("Feature generator not directly accessible, relying on metadata above.")

else:
    print("No NeuralNetTorch model found in the list.")
    print("Available models:", model_names)

=== 1. Best Model Info ===
Best Model: WeightedEnsemble_L2

=== 2. Analyzing: NeuralNetTorch_BAG_L1 ===

--- Hyperparameters ---
{'bin': 'auto',
 'max_base_models': 0,
 'max_base_models_per_type': 'auto',
 'model_random_seed': 0,
 'n_bins': None,
 'save_bag_folds': True,
 'stratify': 'auto',
 'use_orig_features': True,
 'valid_stacker': True,
 'vary_seed_across_folds': False}

--- Model Arguments (Architecture) ---
{'drop_unique': False,
 'get_features_kwargs': None,
 'get_features_kwargs_extra': None,
 'ignored_type_group_raw': None,
 'ignored_type_group_special': None,
 'max_memory_usage_ratio': 1.0,
 'max_time_limit': None,
 'max_time_limit_ratio': 1.0,
 'min_time_limit': 0,
 'predict_1_batch_size': None,
 'temperature_scalar': None,
 'valid_raw_types': None,
 'valid_special_types': None}

=== 3. Feature Metadata ===
('category', [])  :  3 | ['proto', 'service', 'state']
('float', [])     : 11 | ['dur', 'rate', 'sload', 'dload', 'sinpkt', ...]
('int', [])       : 26 | ['spkts', 'dpk