In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load generated data
data = pd.read_csv('data/synthetic/banking_ddos_dataset_YYYYMMDD_HHMMSS.csv')
labels = np.load('data/synthetic/banking_ddos_labels_YYYYMMDD_HHMMSS.npy')

# Basic statistics
print("Dataset Shape:", data.shape)
print("Attack Percentage:", (labels.sum() / len(labels)) * 100, "%")

# Visualize patterns
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Request rate over time
axes[0, 0].plot(data['api_request_rate'], alpha=0.7)
axes[0, 0].scatter(np.where(labels == 1)[0], data.iloc[np.where(labels == 1)[0]]['api_request_rate'],
                   color='red', alpha=0.5, s=1)
axes[0, 0].set_title('API Request Rate (Red = Attacks)')

# Error rate over time
axes[0, 1].plot(data['api_error_rate'], alpha=0.7)
axes[0, 1].scatter(np.where(labels == 1)[0], data.iloc[np.where(labels == 1)[0]]['api_error_rate'],
                   color='red', alpha=0.5, s=1)
axes[0, 1].set_title('API Error Rate (Red = Attacks)')

# Response time patterns
axes[1, 0].plot(data['api_response_time_p95'], alpha=0.7)
axes[1, 0].scatter(np.where(labels == 1)[0], data.iloc[np.where(labels == 1)[0]]['api_response_time_p95'],
                   color='red', alpha=0.5, s=1)
axes[1, 0].set_title('API Response Time P95 (Red = Attacks)')

# CPU usage patterns
axes[1, 1].plot(data['cpu_usage_percent'], alpha=0.7)
axes[1, 1].scatter(np.where(labels == 1)[0], data.iloc[np.where(labels == 1)[0]]['cpu_usage_percent'],
                   color='red', alpha=0.5, s=1)
axes[1, 1].set_title('CPU Usage % (Red = Attacks)')

plt.tight_layout()
plt.show()

# Feature correlation analysis
plt.figure(figsize=(12, 8))
correlation_matrix = data.select_dtypes(include=[np.number]).corr()
sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()