# Multi-UE Traffic Classification - Exploratory Data Analysis

This notebook provides exploratory data analysis for network flow data used in traffic classification.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Configure plotting
plt.style.use('default')
sns.set_palette('husl')
%matplotlib inline

In [None]:
# Load synthetic data
df = pd.read_csv('../data/synthetic/all_synthetic_flows.csv')
print(f"Loaded {len(df)} flows with {df.shape[1]} features")
df.head()

In [None]:
# Class distribution
plt.figure(figsize=(10, 6))
class_counts = df['Application_Class'].value_counts()
plt.pie(class_counts.values, labels=class_counts.index, autopct='%1.1f%%')
plt.title('Traffic Class Distribution')
plt.show()

In [None]:
# Feature correlation matrix
numeric_cols = df.select_dtypes(include=[np.number]).columns[:15]  # First 15 numeric columns
plt.figure(figsize=(12, 8))
correlation_matrix = df[numeric_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()