# Network Threat Detection System - Model Evaluation

This notebook provides a comprehensive evaluation of our threat detection models.

In [None]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, precision_recall_curve, confusion_matrix
import yaml

# Add project root to Python path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

from src.threat_detection import ThreatDetector
from src.evaluation import ModelEvaluator

## 1. Load Configuration and Data

In [None]:
# Load configuration
with open(project_root / 'config' / 'config.yml', 'r') as f:
    config = yaml.safe_load(f)

# Load datasets
normal_traffic = pd.read_csv(project_root / 'data' / 'normal_traffic.csv')
attack_traffic = pd.concat([
    pd.read_csv(project_root / 'data' / f'{attack}_traffic.csv')
    for attack in ['ddos', 'port_scan', 'data_exfiltration']
])

# Prepare features and labels
X = pd.concat([normal_traffic, attack_traffic])
y = np.concatenate([np.zeros(len(normal_traffic)), np.ones(len(attack_traffic))])

## 2. Initialize Models and Evaluator

In [None]:
# Initialize models
detector = ThreatDetector(config['models'])
evaluator = ModelEvaluator(config)

# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 3. Train and Evaluate Models

In [None]:
# Train model
detector.train_model(X_train, y_train)

# Make predictions
results = detector.detect_threats(X_test)

# Evaluate performance
metrics = evaluator.evaluate_model_performance(
    y_test,
    results['threats_detected'],
    results['threat_probabilities_rf'],
    str(project_root / 'reports')
)

print("\nModel Performance Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.3f}")

## 4. Detailed Analysis

In [None]:
# Plot ROC curve
fpr, tpr, _ = roc_curve(y_test, results['threat_probabilities_rf'])
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

## 5. Feature Importance Analysis

In [None]:
# Get feature importance
feature_importance = detector.rf_model.best_estimator_.feature_importances_
feature_names = X.columns

# Plot feature importance
importance_df = pd.DataFrame({'feature': feature_names, 'importance': feature_importance})
importance_df = importance_df.sort_values('importance', ascending=True)

plt.figure(figsize=(12, 6))
plt.barh(importance_df['feature'], importance_df['importance'])
plt.title('Feature Importance')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()

## 6. Error Analysis

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, results['threats_detected'])

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Analyze misclassified samples
misclassified = X_test[results['threats_detected'] != y_test]
print("\nMisclassified Samples Analysis:")
print(misclassified.describe())