In [None]:
# Network Traffic Congestion Analysis ðŸš¦

This notebook demonstrates how to use the Network Traffic Congestion Predictor for data analysis and model evaluation.

## Table of Contents
1. [Setup and Data Loading](#setup)
2. [Data Exploration](#exploration)
3. [Model Training and Evaluation](#model)
4. [Interactive Predictions](#predictions)


In [None]:
## 1. Setup and Data Loading <a name="setup"></a>

First, let's import the necessary libraries and load our dataset.


In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

# Add project root to path
sys.path.append('..')

# Import project modules
from core.trainer import TrafficModelTrainer
from core.predictor import TrafficPredictor


In [None]:
# Load the dataset
df = pd.read_csv('../assets/datasets/synthetic_network_data.csv')
df.head()


In [None]:
## 2. Data Exploration <a name="exploration"></a>

Let's analyze our network traffic data to understand patterns and relationships.


In [None]:
# Basic statistics
print("Dataset Shape:", df.shape)
print("\nBasic Statistics:")
df.describe()


In [None]:
# Protocol distribution
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='protocol')
plt.title('Distribution of Network Protocols')
plt.show()


In [None]:
# Service distribution
plt.figure(figsize=(12, 6))
sns.countplot(data=df, x='service', order=df['service'].value_counts().index)
plt.xticks(rotation=45)
plt.title('Distribution of Network Services')
plt.show()


In [None]:
## 3. Model Training and Evaluation <a name="model"></a>

Now let's train our model and evaluate its performance.


In [None]:
# Train and save the model
data_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'assets/datasets/synthetic_network_data.csv')
trainer = TrafficModelTrainer()
metrics = trainer.train(data_path)

# Load the trained model for predictions
predictor = TrafficPredictor()
input_data = df.drop('congestion', axis=1).to_dict('records')[0]
result = predictor.predict(input_data)
y_pred = [result['congestion']]
y = df['congestion']

# Print classification report
print("Classification Report:")
print(classification_report(y, y_pred))


In [None]:
# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


In [None]:
## 4. Interactive Predictions <a name="predictions"></a>

Let's create an interactive function to make predictions on new data.


In [None]:
def predict_congestion(duration, src_bytes, dst_bytes, packet_count, hour, protocol, service):
    """Make a prediction for network congestion."""
    predictor = TrafficPredictor()
    
    # Create input data
    input_data = {
        'duration': duration,
        'src_bytes': src_bytes,
        'dst_bytes': dst_bytes,
        'packet_count': packet_count,
        'hour': hour,
        'protocol': protocol,
        'service': service
    }
    
    # Get prediction
    return predictor.predict(input_data)


In [None]:
# Example prediction
result = predict_congestion(
    duration=10.5,
    src_bytes=5120,
    dst_bytes=2400,
    packet_count=65,
    hour=9,
    protocol='TCP',
    service='http'
)

print("Prediction Result:")
print(f"Congestion: {result['congestion']}")
print(f"Probability: {result['probability']:.2f}")
