# SDN ML Traffic Management - Model Evaluation

This notebook evaluates the trained models and generates summary reports.

In [None]:
!pip install -q pandas numpy scikit-learn matplotlib seaborn joblib

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import joblib
from google.colab import files

plt.style.use('seaborn-v0_8-whitegrid')

## 1. Load Models

In [None]:
# Upload models if not present
try:
    classifier_artifacts = joblib.load('classifier.pkl')
    print("Loaded classifier.pkl")
except FileNotFoundError:
    print("Upload classifier.pkl:")
    files.upload()
    classifier_artifacts = joblib.load('classifier.pkl')

try:
    predictor_artifacts = joblib.load('predictor.pkl')
    print("Loaded predictor.pkl")
except FileNotFoundError:
    print("Upload predictor.pkl:")
    files.upload()
    predictor_artifacts = joblib.load('predictor.pkl')

In [None]:
# Extract models
traffic_classifier = classifier_artifacts['classifier']
label_encoder = classifier_artifacts['label_encoder']
classifier_features = classifier_artifacts['feature_columns']
classifier_accuracy = classifier_artifacts.get('accuracy', 'N/A')

congestion_regressor = predictor_artifacts['regressor']
congestion_classifier = predictor_artifacts['classifier']
predictor_features = predictor_artifacts['feature_columns']
predictor_metrics = predictor_artifacts.get('metrics', {})

print("Models loaded successfully!")

## 2. Traffic Classifier Summary

In [None]:
print("="*60)
print("TRAFFIC CLASSIFIER SUMMARY")
print("="*60)
print(f"\nModel: {type(traffic_classifier).__name__}")
print(f"Classes: {list(label_encoder.classes_)}")
print(f"Features: {len(classifier_features)}")
print(f"Test Accuracy: {classifier_accuracy}")

print("\nFeature Importance:")
importance = pd.DataFrame({
    'Feature': classifier_features,
    'Importance': traffic_classifier.feature_importances_
}).sort_values('Importance', ascending=False)
print(importance.to_string(index=False))

In [None]:
# Visualize feature importance
fig, ax = plt.subplots(figsize=(10, 6))
importance_sorted = importance.sort_values('Importance', ascending=True)
ax.barh(importance_sorted['Feature'], importance_sorted['Importance'], color='steelblue')
ax.set_xlabel('Importance')
ax.set_title('Traffic Classifier - Feature Importance')
plt.tight_layout()
plt.show()

## 3. Congestion Predictor Summary

In [None]:
print("="*60)
print("CONGESTION PREDICTOR SUMMARY")
print("="*60)
print(f"\nRegressor: {type(congestion_regressor).__name__}")
print(f"Classifier: {type(congestion_classifier).__name__}")
print(f"Features: {len(predictor_features)}")
print(f"\nMetrics:")
for metric, value in predictor_metrics.items():
    print(f"  {metric}: {value:.4f}")

print("\nFeature Importance (Regressor):")
pred_importance = pd.DataFrame({
    'Feature': predictor_features,
    'Importance': congestion_regressor.feature_importances_
}).sort_values('Importance', ascending=False)
print(pred_importance.to_string(index=False))

In [None]:
# Visualize predictor feature importance
fig, ax = plt.subplots(figsize=(10, 6))
pred_importance_sorted = pred_importance.sort_values('Importance', ascending=True)
ax.barh(pred_importance_sorted['Feature'], pred_importance_sorted['Importance'], color='coral')
ax.set_xlabel('Importance')
ax.set_title('Congestion Predictor - Feature Importance')
plt.tight_layout()
plt.show()

## 4. Model Architecture Visualization

In [None]:
# Create summary diagram
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Traffic Classifier
ax1 = axes[0]
classes = list(label_encoder.classes_)
colors = ['#ff6b6b', '#4ecdc4', '#45b7d1', '#96ceb4']
ax1.barh(classes, [1, 1, 1, 1], color=colors[:len(classes)])
ax1.set_xlabel('Priority Level')
ax1.set_title('Traffic Priority Classes')
ax1.set_xlim(0, 1.2)
for i, cls in enumerate(classes):
    descriptions = {
        'P0': 'Bulk/Background',
        'P1': 'Web/Office', 
        'P2': 'Voice/Video',
        'P3': 'Banking/Payment'
    }
    ax1.text(1.05, i, descriptions.get(cls, ''), va='center')

# Congestion Predictor
ax2 = axes[1]
metrics_plot = {
    'R2 Score': predictor_metrics.get('regression_r2', 0),
    'Classification\nAccuracy': predictor_metrics.get('classification_accuracy', 0),
}
ax2.bar(metrics_plot.keys(), metrics_plot.values(), color=['steelblue', 'coral'])
ax2.set_ylabel('Score')
ax2.set_title('Congestion Predictor Performance')
ax2.set_ylim(0, 1)
for i, (k, v) in enumerate(metrics_plot.items()):
    ax2.text(i, v + 0.02, f'{v:.2%}', ha='center')

plt.tight_layout()
plt.show()

## 5. Generate Report

In [None]:
report = f"""
# SDN ML Traffic Management - Model Report
Generated: {pd.Timestamp.now()}

## Traffic Classifier

- **Model**: {type(traffic_classifier).__name__}
- **Number of estimators**: {traffic_classifier.n_estimators}
- **Max depth**: {traffic_classifier.max_depth}
- **Test accuracy**: {classifier_accuracy}

### Classes
- P3 (Banking/Payment): Highest priority - lowest delay, reserved bandwidth
- P2 (Voice/Video): Low jitter, low delay
- P1 (Web/Office): Best effort
- P0 (Bulk/Background): Lowest priority - throttle under congestion

### Top Features
{importance.head(5).to_string(index=False)}

## Congestion Predictor

- **Regressor**: {type(congestion_regressor).__name__}
- **Classifier**: {type(congestion_classifier).__name__}
- **R2 Score**: {predictor_metrics.get('regression_r2', 'N/A')}
- **Classification Accuracy**: {predictor_metrics.get('classification_accuracy', 'N/A')}

### Top Features
{pred_importance.head(5).to_string(index=False)}

## Deployment Instructions

1. Place `classifier.pkl` in `ml/models/` directory
2. Place `predictor.pkl` in `ml/models/` directory  
3. Restart the orchestrator container
4. The orchestrator will automatically use trained models

## Next Steps

1. Collect more training data with diverse traffic patterns
2. Add Scapy-based packet features for better classification
3. Implement online learning for continuous improvement
"""

print(report)

In [None]:
# Save report
with open('model_report.md', 'w') as f:
    f.write(report)

files.download('model_report.md')
print("Report saved and downloaded!")

## 6. Quick Test

In [None]:
# Quick sanity check
print("Quick Model Test:")
print("-" * 40)

# Test traffic classifier
test_flow = pd.DataFrame([{
    'packet_count': 50,
    'byte_count': 25000,
    'duration_sec': 5,
    'bytes_per_packet': 500,
    'packets_per_sec': 10,
    'bytes_per_sec': 5000,
    'is_udp': 0,
    'is_https': 1,
    'is_voice_port': 0,
}])

# Select available features
available_features = [f for f in classifier_features if f in test_flow.columns]
if available_features:
    pred = traffic_classifier.predict(test_flow[available_features])[0]
    print(f"Sample flow (HTTPS, short): Predicted = {label_encoder.classes_[pred]}")

# Test congestion predictor
test_util = pd.DataFrame([{
    'util_lag_1': 0.65,
    'util_lag_2': 0.60,
    'util_lag_3': 0.55,
    'util_lag_4': 0.50,
    'util_lag_5': 0.45,
    'util_lag_6': 0.40,
    'util_rolling_mean': 0.525,
    'util_rolling_std': 0.09,
    'util_rolling_max': 0.65,
    'util_trend': 0.05,
    'hour_of_day': 9,
    'minute_of_hour': 30,
    'is_weekday': 1,
}])

pred_util = congestion_regressor.predict(test_util[predictor_features])[0]
pred_congested = congestion_classifier.predict(test_util[predictor_features])[0]
print(f"Sample link (rising, 9:30AM): Predicted util = {pred_util:.1%}, Congested = {bool(pred_congested)}")

## Complete!

Your models are ready for deployment. Follow the instructions in the report to deploy them to your SDN environment.