# P22 IDS - Quick Start Notebook

This notebook demonstrates basic usage of the Encrypted Traffic IDS.

## What You'll Learn:
1. Initialize the IDS system
2. Process CSV files
3. Process PCAP files
4. Run predictions with LSTM and CNN
5. View and interpret results

## Setup and Imports

In [None]:
# Add parent directory to path
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))

# Import required modules
import numpy as np
import pandas as pd
import json
from orchestrator import ServiceOrchestrator
from services.dataIngestionService import DataIngestionService
from services.lstmModelService import LSTMModelService
from services.cnnModelService import CNNModelService
from services.outputManagementService import OutputManagementService

print("✓ Imports successful!")

## 1. Initialize the IDS System

In [None]:
# Initialize orchestrator
orchestrator = ServiceOrchestrator('../config.example.yaml')

# Start all services
if orchestrator.initialize():
    print("✓ All services initialized successfully!")
else:
    print("✗ Failed to initialize services")

In [None]:
# Check system status
status = orchestrator.getSystemStatus()

print("\nSystem Status:")
print(f"Initialized: {status['initialized']}")
print("\nService Status:")
for service_name, service_info in status['services'].items():
    print(f"  {service_name}: {service_info.get('status')}")

## 2. Process CSV File

Replace `'sample_data.csv'` with your actual CSV file path.

In [None]:
# Example: Create dummy CSV data for testing
# Skip this if you have real data

dummy_data = pd.DataFrame({
    'feature1': np.random.rand(100),
    'feature2': np.random.rand(100),
    'feature3': np.random.rand(100),
    'feature4': np.random.rand(100),
    'feature5': np.random.rand(100),
    'label': np.random.randint(0, 2, 100)
})

dummy_data.to_csv('../sample_data.csv', index=False)
print("✓ Created sample CSV file: sample_data.csv")

In [None]:
# Process CSV file
csv_file = '../sample_data.csv'

print(f"Processing CSV file: {csv_file}")
csv_result = orchestrator.processDataFile(csv_file, fileType='csv')

print(f"\n✓ CSV Processing Complete!")
print(f"  File Type: {csv_result['fileType']}")
print(f"  Samples: {csv_result['sampleCount']}")
print(f"  Features: {len(csv_result['featureNames'])}")
print(f"  Feature Shape: {csv_result['features'].shape}")

## 3. Run Predictions on CSV Data

In [None]:
# Run inference with both models
print("Running LSTM and CNN models...")

prediction_result = orchestrator.runInference(
    data=csv_result,
    modelType='both',
    aggregate=True
)

print("\n✓ Prediction Complete!")

In [None]:
# Display results
print("=== Final Prediction Results ===")
print(f"Model Type: {prediction_result['modelType']}")
print(f"Aggregation Method: {prediction_result.get('aggregationMethod', 'N/A')}")
print(f"\nPredictions: {prediction_result['predictions'][:10]}...")
print(f"Confidence: {prediction_result['confidence']:.4f}")

# Count predictions by class
from collections import Counter
pred_counts = Counter(prediction_result['predictions'])
print(f"\nPrediction Distribution:")
for class_id, count in sorted(pred_counts.items()):
    print(f"  Class {class_id}: {count} samples ({count/len(prediction_result['predictions'])*100:.1f}%)")

## 4. Compare LSTM vs CNN Results

In [None]:
# Run LSTM only
lstm_result = orchestrator.runInference(
    data=csv_result,
    modelType='lstm',
    aggregate=False
)

# Run CNN only
cnn_result = orchestrator.runInference(
    data=csv_result,
    modelType='cnn',
    aggregate=False
)

print("=== Model Comparison ===")
print(f"\nLSTM Model:")
print(f"  Predictions: {lstm_result['predictions'][:10]}...")
print(f"  Avg Confidence: {np.mean(lstm_result['confidences']):.4f}")

print(f"\nCNN Model:")
print(f"  Predictions: {cnn_result['predictions'][:10]}...")
print(f"  Avg Confidence: {np.mean(cnn_result['confidences']):.4f}")

# Calculate agreement
agreement = sum(1 for l, c in zip(lstm_result['predictions'], cnn_result['predictions']) if l == c)
print(f"\nModel Agreement: {agreement}/{len(lstm_result['predictions'])} ({agreement/len(lstm_result['predictions'])*100:.1f}%)")

## 5. Visualize Results

In [None]:
import matplotlib.pyplot as plt

# Plot prediction distribution
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# LSTM predictions
axes[0].hist(lstm_result['predictions'], bins=10, alpha=0.7, color='blue')
axes[0].set_title('LSTM Predictions')
axes[0].set_xlabel('Class')
axes[0].set_ylabel('Count')

# CNN predictions
axes[1].hist(cnn_result['predictions'], bins=10, alpha=0.7, color='green')
axes[1].set_title('CNN Predictions')
axes[1].set_xlabel('Class')
axes[1].set_ylabel('Count')

# Ensemble predictions
axes[2].hist(prediction_result['predictions'], bins=10, alpha=0.7, color='red')
axes[2].set_title('Ensemble Predictions')
axes[2].set_xlabel('Class')
axes[2].set_ylabel('Count')

plt.tight_layout()
plt.show()

In [None]:
# Plot confidence scores
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# LSTM confidence distribution
axes[0].hist(lstm_result['confidences'], bins=20, alpha=0.7, color='blue', edgecolor='black')
axes[0].axvline(np.mean(lstm_result['confidences']), color='red', linestyle='--', 
                label=f"Mean: {np.mean(lstm_result['confidences']):.3f}")
axes[0].set_title('LSTM Confidence Distribution')
axes[0].set_xlabel('Confidence')
axes[0].set_ylabel('Count')
axes[0].legend()

# CNN confidence distribution
axes[1].hist(cnn_result['confidences'], bins=20, alpha=0.7, color='green', edgecolor='black')
axes[1].axvline(np.mean(cnn_result['confidences']), color='red', linestyle='--',
                label=f"Mean: {np.mean(cnn_result['confidences']):.3f}")
axes[1].set_title('CNN Confidence Distribution')
axes[1].set_xlabel('Confidence')
axes[1].set_ylabel('Count')
axes[1].legend()

plt.tight_layout()
plt.show()

## 6. View Output Files

In [None]:
# List output files
from pathlib import Path

output_dir = Path('../outputs')

print("=== Output Files ===")
for subdir in ['lstm', 'cnn', 'final']:
    dir_path = output_dir / subdir
    if dir_path.exists():
        files = list(dir_path.glob('*.json'))
        print(f"\n{subdir.upper()}/: {len(files)} files")
        if files:
            print(f"  Latest: {files[-1].name}")

In [None]:
# Read and display a final prediction file
final_files = list((output_dir / 'final').glob('final_prediction_*.json'))
if final_files:
    latest_file = final_files[-1]
    print(f"Reading: {latest_file.name}")
    
    with open(latest_file) as f:
        final_data = json.load(f)
    
    print("\n=== Final Prediction File ===")
    print(json.dumps(final_data, indent=2)[:500] + "...")

## 7. Generate Report

In [None]:
# Generate output report
report = orchestrator.outputService.generateReport()

print("=== Output Report ===")
print(f"Generated At: {report['generatedAt']}")
print(f"\nOutput Counts:")
for output_type, count in report['outputCounts'].items():
    print(f"  {output_type.upper()}: {count}")

## 8. Cleanup

In [None]:
# Shutdown all services
orchestrator.shutdown()
print("✓ All services shut down successfully!")

## Summary

In this notebook, you learned how to:
1. ✓ Initialize the IDS system
2. ✓ Process CSV files
3. ✓ Run predictions with LSTM and CNN models
4. ✓ Compare model outputs
5. ✓ Visualize results
6. ✓ Access output files
7. ✓ Generate reports

**Next Steps:**
- Try with your own CSV files
- Explore PCAP processing in the next notebook
- Learn about model training and hyperparameter tuning