# Model Training for Intrusion Detection System

This notebook trains and compares multiple ML models (Random Forest, SVM, Neural Network) for intrusion detection.


In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))))

from src.preprocessing import DataPreprocessor
from src.models import RandomForestIDS, SVMIDS, NeuralNetworkIDS
from src.evaluation import ModelEvaluator, evaluate_model
from src.visualization import IDSVisualizer

print("Libraries imported successfully!")


## 1. Load and Preprocess Data


In [None]:
# Initialize preprocessor
preprocessor = DataPreprocessor()

# Load data
data_path = '../data/raw/sample_data.csv'  # Update with your dataset path
df = preprocessor.load_data(data_path)

if df is None:
    print("Creating sample data...")
    from src.preprocessing import main as create_sample
    create_sample()
    df = preprocessor.load_data(data_path)

# Clean and extract features
df_cleaned = preprocessor.clean_data(df)
df_features = preprocessor.extract_features(df_cleaned)

# Prepare training and test sets
X_train, X_test, y_train, y_test = preprocessor.prepare_data(
    df_features, test_size=0.2, random_state=42
)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")
print(f"Number of classes: {len(np.unique(y_train))}")


## 2. Train Random Forest Model


In [None]:
# Initialize and train Random Forest
rf_model = RandomForestIDS(n_estimators=100, random_state=42)
rf_model.train(X_train, y_train)

# Evaluate
rf_metrics, rf_evaluator = evaluate_model(
    rf_model, X_test, y_test, 
    model_name="Random Forest",
    class_names=preprocessor.label_encoder.classes_ if hasattr(preprocessor.label_encoder, 'classes_') else None
)

# Save model
rf_model.save('../models/random_forest_model.pkl')


## 3. Train SVM Model


In [None]:
# Initialize and train SVM
svm_model = SVMIDS(kernel='rbf', C=1.0, random_state=42)
svm_model.train(X_train, y_train)

# Evaluate
svm_metrics, svm_evaluator = evaluate_model(
    svm_model, X_test, y_test,
    model_name="SVM",
    class_names=preprocessor.label_encoder.classes_ if hasattr(preprocessor.label_encoder, 'classes_') else None
)

# Save model
svm_model.save('../models/svm_model.pkl')


## 4. Train Neural Network Model


In [None]:
# Split training data for validation
from sklearn.model_selection import train_test_split
X_train_nn, X_val_nn, y_train_nn, y_val_nn = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)

# Initialize and train Neural Network
nn_model = NeuralNetworkIDS(hidden_layers=[128, 64], dropout_rate=0.3)
nn_model.train(
    X_train_nn, y_train_nn, 
    X_val_nn, y_val_nn,
    epochs=50, 
    batch_size=32
)

# Plot training history
if nn_model.history:
    visualizer = IDSVisualizer()
    visualizer.plot_training_history(nn_model.history)

# Evaluate
nn_metrics, nn_evaluator = evaluate_model(
    nn_model, X_test, y_test,
    model_name="Neural Network",
    class_names=preprocessor.label_encoder.classes_ if hasattr(preprocessor.label_encoder, 'classes_') else None
)

# Save model
nn_model.save('../models/neural_network_model.h5')


## 5. Compare All Models


In [None]:
# Collect all results
results = {
    'Random Forest': rf_metrics,
    'SVM': svm_metrics,
    'Neural Network': nn_metrics
}

# Create comparison DataFrame
comparison_df = pd.DataFrame(results).T
print("Model Comparison:")
print(comparison_df)

# Visualize comparison
visualizer = IDSVisualizer()
visualizer.plot_model_performance_comparison(results)

# Save comparison
comparison_df.to_csv('../models/model_comparison.csv')
print("\nComparison saved to ../models/model_comparison.csv")
