# CIC-IDS2017 EDA

# Phase 0: Setup & Configuration

In [None]:
# =============================================================================
# STEP 0.1: IMPORTS
# =============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import json
import gc
import re
from pathlib import Path
from datetime import datetime

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)
pd.set_option('display.float_format', '{:.4f}'.format)

# Plot settings
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10
sns.set_palette('husl')

print(f"Analysis started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")

In [None]:
# =============================================================================
# STEP 0.2: FILE PATHS - UPDATE THIS
# =============================================================================

DATA_DIR = "data/"  # <-- UPDATE THIS PATH

# Output directory for EDA artifacts
OUTPUT_DIR = "eda_outputs/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Find all CSV files
FILE_PATHS = sorted(glob.glob(os.path.join(DATA_DIR, "*.csv")))

if not FILE_PATHS:
    raise FileNotFoundError(f"No CSV files found in {DATA_DIR}. Please update DATA_DIR.")

print(f"Found {len(FILE_PATHS)} CSV files:")
for fp in FILE_PATHS:
    print(f"  - {os.path.basename(fp)}")

In [None]:
# =============================================================================
# STEP 0.3: LABEL NORMALIZATION FUNCTION
# =============================================================================
# CIC-IDS2017 has inconsistent label encoding across different versions.
# This function normalizes all variants to a canonical form.

def normalize_label(label):
    """
    Normalize attack labels to handle encoding issues and case variations.
    """
    if pd.isna(label):
        return 'Unknown'
    
    label = str(label).strip()
    
    # Normalize various dash characters to standard hyphen
    # Handles: en-dash (–), em-dash (—), replacement char (�), and others
    label = re.sub(r'[–—�\x96\u2013\u2014]', '-', label)
    
    # Normalize to canonical labels (case-insensitive matching)
    label_lower = label.lower()
    
    # Direct mappings for known variations
    label_map = {
        'benign': 'BENIGN',
        'bot': 'Bot',
        'ddos': 'DDoS',
        'dos hulk': 'DoS Hulk',
        'dos goldeneye': 'DoS GoldenEye',
        'dos slowloris': 'DoS Slowloris',
        'dos slowhttptest': 'DoS Slowhttptest',
        'ftp-patator': 'FTP-Patator',
        'ssh-patator': 'SSH-Patator',
        'heartbleed': 'Heartbleed',
        'infiltration': 'Infiltration',
        'portscan': 'PortScan',
        'web attack - brute force': 'Web Attack - Brute Force',
        'web attack - xss': 'Web Attack - XSS',
        'web attack - sql injection': 'Web Attack - Sql Injection',
    }
    
    return label_map.get(label_lower, label)


# Test the normalization
test_labels = [
    'BENIGN', 'benign',
    'DoS slowloris', 'DoS Slowloris',
    'Web Attack � Brute Force', 'Web Attack – Brute Force', 'Web Attack - Brute Force',
    'Web Attack � XSS', 'Web Attack – XSS',
    'DoS Slowhttptest', 'DoS slowhttptest'
]

print("Label normalization test:")
for label in test_labels:
    print(f"  '{label}' → '{normalize_label(label)}'")

In [None]:
# =============================================================================
# STEP 0.4: ATTACK FAMILY MAPPING (Using Normalized Labels)
# =============================================================================

FAMILY_MAP = {
    # Benign
    'BENIGN': 'BENIGN',
    
    # DoS/DDoS Family
    'DoS Hulk': 'DoS',
    'DoS GoldenEye': 'DoS',
    'DoS Slowloris': 'DoS',
    'DoS Slowhttptest': 'DoS',
    'DDoS': 'DoS',
    
    # Brute Force Family
    'FTP-Patator': 'BruteForce',
    'SSH-Patator': 'BruteForce',
    
    # Web Attack Family (using normalized dash)
    'Web Attack - Brute Force': 'WebAttack',
    'Web Attack - XSS': 'WebAttack',
    'Web Attack - Sql Injection': 'WebAttack',
    
    # Probe/Scan Family
    'PortScan': 'Probe',
    
    # Botnet Family
    'Bot': 'Botnet',
    
    # Infiltration Family
    'Infiltration': 'Infiltration',
    
    # Heartbleed
    'Heartbleed': 'Heartbleed'
}

print("Attack Family Mapping:")
for label, family in FAMILY_MAP.items():
    if label != 'BENIGN':
        print(f"  {label} → {family}")

print(f"\nTotal attack types: {len([k for k in FAMILY_MAP if k != 'BENIGN'])}")
print(f"Attack families: {sorted(set(v for v in FAMILY_MAP.values() if v != 'BENIGN'))}")

In [None]:
# =============================================================================
# STEP 0.5: GLOBAL CONFIGURATION
# =============================================================================

CONFIG = {
    # Sampling
    'sample_fraction': 0.10,
    'min_samples_keep_all': 100,
    'random_seed': 42,
    
    # Thresholds
    'high_correlation_threshold': 0.95,
    'near_zero_variance_threshold': 0.99,
    'imbalance_warning_threshold': 0.95,
    
    # Column classifications
    'identifier_columns': ['Flow ID', 'Source IP', 'Destination IP', 'Timestamp'],
    'target_column': 'Label',
    'categorical_columns': ['Protocol', 'Destination Port'],
    
    # Paths
    'data_dir': DATA_DIR,
    'output_dir': OUTPUT_DIR,
}

print("Configuration:")
print(f"  - Sample fraction: {CONFIG['sample_fraction']*100}%")
print(f"  - Random seed: {CONFIG['random_seed']}")

In [None]:
# =============================================================================
# STEP 0.6: HELPER FUNCTIONS
# =============================================================================

def get_file_category(filepath):
    """Extract category from filename (e.g., 'Botnet' from 'Botnet.csv')."""
    basename = os.path.basename(filepath)
    return os.path.splitext(basename)[0]


def estimate_memory_mb(df):
    """Estimate DataFrame memory usage in MB."""
    return df.memory_usage(deep=True).sum() / (1024 * 1024)


def count_infinities(df):
    """Count infinite values in numeric columns."""
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) == 0:
        return 0, {}
    
    inf_counts = {}
    total = 0
    for col in numeric_cols:
        count = np.isinf(df[col]).sum()
        if count > 0:
            inf_counts[col] = int(count)
            total += count
    return total, inf_counts


def count_negatives(df):
    """Count negative values in numeric columns."""
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    
    neg_counts = {}
    total = 0
    for col in numeric_cols:
        count = (df[col] < 0).sum()
        if count > 0:
            neg_counts[col] = int(count)
            total += count
    return total, neg_counts


def clean_column_names(df):
    """Strip whitespace from column names."""
    df.columns = df.columns.str.strip()
    return df


def normalize_labels_in_df(df, label_col='Label'):
    """Apply label normalization to a DataFrame."""
    if label_col in df.columns:
        df[label_col] = df[label_col].apply(normalize_label)
    return df


print("Helper functions defined.")

---
# Phase 1: Per-File Health Check

In [None]:
# =============================================================================
# STEP 1.1: SCHEMA CONSISTENCY CHECK
# =============================================================================

print("=" * 60)
print("STEP 1.1: SCHEMA CONSISTENCY CHECK")
print("=" * 60)

schemas = {}

for filepath in FILE_PATHS:
    filename = os.path.basename(filepath)
    category = get_file_category(filepath)
    
    df_header = pd.read_csv(filepath, nrows=0)
    df_header = clean_column_names(df_header)
    
    columns = set(df_header.columns)
    schemas[filename] = {
        'category': category,
        'columns': columns,
        'num_columns': len(columns)
    }
    
    print(f"{filename} ({category}): {len(columns)} columns")

# Check schema consistency
reference_file = list(schemas.keys())[0]
reference_columns = schemas[reference_file]['columns']

print("\n--- Schema Comparison ---")
all_match = True

for filename, info in schemas.items():
    if info['columns'] != reference_columns:
        all_match = False
        missing = reference_columns - info['columns']
        extra = info['columns'] - reference_columns
        if missing:
            print(f"  {filename}: MISSING columns: {missing}")
        if extra:
            print(f"  {filename}: EXTRA columns: {extra}")

if all_match:
    print("  ✓ All files have identical schemas.")

common_columns = set.intersection(*[info['columns'] for info in schemas.values()])
print(f"\nCommon columns: {len(common_columns)}")

In [None]:
# =============================================================================
# STEPS 1.2-1.8: PER-FILE HEALTH ANALYSIS
# =============================================================================

print("=" * 60)
print("STEPS 1.2-1.8: PER-FILE HEALTH ANALYSIS")
print("=" * 60)

file_health_records = []
all_label_counts = {}  # Aggregate labels across ALL files

for filepath in FILE_PATHS:
    filename = os.path.basename(filepath)
    category = get_file_category(filepath)
    
    print(f"\n{'='*60}")
    print(f"Processing: {filename} (Category: {category})")
    print(f"{'='*60}")
    
    try:
        df = pd.read_csv(filepath, low_memory=False)
        df = clean_column_names(df)
        df = normalize_labels_in_df(df, 'Label')  # NORMALIZE LABELS
        
        # -----------------------------------------------------------------
        # 1.2: Shape & Memory
        # -----------------------------------------------------------------
        n_rows, n_cols = df.shape
        memory_mb = estimate_memory_mb(df)
        
        print(f"\n[1.2] Shape & Memory:")
        print(f"      Rows: {n_rows:,} | Columns: {n_cols} | Memory: {memory_mb:.2f} MB")
        
        # -----------------------------------------------------------------
        # 1.3: Data Types
        # -----------------------------------------------------------------
        dtype_counts = df.dtypes.value_counts()
        print(f"\n[1.3] Data Types:")
        for dtype, count in dtype_counts.items():
            print(f"      {dtype}: {count}")
        
        # -----------------------------------------------------------------
        # 1.4: Missing Values
        # -----------------------------------------------------------------
        null_total = df.isnull().sum().sum()
        cols_with_nulls = df.isnull().sum()
        cols_with_nulls = cols_with_nulls[cols_with_nulls > 0]
        
        print(f"\n[1.4] Missing Values: {null_total:,} total")
        if len(cols_with_nulls) > 0:
            for col, count in cols_with_nulls.head(3).items():
                print(f"      - {col}: {count:,} ({count/n_rows*100:.2f}%)")
        
        # -----------------------------------------------------------------
        # 1.5: Infinity Values
        # -----------------------------------------------------------------
        inf_total, inf_by_col = count_infinities(df)
        print(f"\n[1.5] Infinity Values: {inf_total:,} total")
        if inf_total > 0:
            for col, count in list(inf_by_col.items())[:3]:
                print(f"      - {col}: {count:,}")
        
        # -----------------------------------------------------------------
        # 1.6: Negative Values
        # -----------------------------------------------------------------
        neg_total, neg_by_col = count_negatives(df)
        print(f"\n[1.6] Negative Values: {neg_total:,} total")
        if neg_total > 0:
            for col, count in list(neg_by_col.items())[:3]:
                print(f"      - {col}: {count:,}")
        
        # -----------------------------------------------------------------
        # 1.7: Duplicates
        # -----------------------------------------------------------------
        n_duplicates = df.duplicated().sum()
        print(f"\n[1.7] Duplicates: {n_duplicates:,} ({n_duplicates/n_rows*100:.2f}%)")
        
        # -----------------------------------------------------------------
        # 1.8: Label Distribution (with aggregation)
        # -----------------------------------------------------------------
        if 'Label' in df.columns:
            label_counts = df['Label'].value_counts()
            
            print(f"\n[1.8] Label Distribution:")
            for label, count in label_counts.items():
                pct = count / n_rows * 100
                family = FAMILY_MAP.get(label, 'UNMAPPED')
                print(f"      - {label}: {count:,} ({pct:.2f}%) [Family: {family}]")
                
                # Aggregate into global counts
                all_label_counts[label] = all_label_counts.get(label, 0) + count
            
            # Check for unmapped labels
            unmapped = [l for l in label_counts.index if l not in FAMILY_MAP]
            if unmapped:
                print(f"      ⚠ UNMAPPED LABELS: {unmapped}")
            
            benign_pct = label_counts.get('BENIGN', 0) / n_rows * 100
        else:
            benign_pct = 0
        
        # Store record
        file_health_records.append({
            'filename': filename,
            'category': category,
            'n_rows': n_rows,
            'n_columns': n_cols,
            'memory_mb': round(memory_mb, 2),
            'null_total': null_total,
            'inf_total': inf_total,
            'neg_total': neg_total,
            'n_duplicates': n_duplicates,
            'n_labels': len(label_counts) if 'Label' in df.columns else 0,
            'benign_pct': round(benign_pct, 2)
        })
        
        del df
        gc.collect()
        
    except Exception as e:
        print(f"  ERROR: {e}")
        import traceback
        traceback.print_exc()

print(f"\n{'='*60}")
print("Per-file analysis complete.")

In [None]:
# =============================================================================
# FILE HEALTH REPORT SUMMARY
# =============================================================================

print("=" * 60)
print("FILE HEALTH REPORT SUMMARY")
print("=" * 60)

df_health = pd.DataFrame(file_health_records)
print("\n")
print(df_health.to_string(index=False))

print("\n--- Aggregated Statistics ---")
print(f"Total rows: {df_health['n_rows'].sum():,}")
print(f"Total memory: {df_health['memory_mb'].sum():.2f} MB")
print(f"Total nulls: {df_health['null_total'].sum():,}")
print(f"Total infinities: {df_health['inf_total'].sum():,}")
print(f"Total negatives: {df_health['neg_total'].sum():,}")
print(f"Total duplicates: {df_health['n_duplicates'].sum():,}")

# Save
health_path = os.path.join(OUTPUT_DIR, 'file_health_report.csv')
df_health.to_csv(health_path, index=False)
print(f"\n✓ Saved to: {health_path}")

In [None]:
# =============================================================================
# GLOBAL LABEL DISTRIBUTION (Aggregated from ALL files)
# =============================================================================

print("=" * 60)
print("GLOBAL LABEL DISTRIBUTION")
print("=" * 60)

# Convert to Series and sort
label_series = pd.Series(all_label_counts).sort_values(ascending=False)
total_samples = label_series.sum()

print(f"\nTotal samples across all files: {total_samples:,}")
print(f"Unique labels: {len(label_series)}")
print("\n--- Label Counts ---")

for label, count in label_series.items():
    pct = count / total_samples * 100
    family = FAMILY_MAP.get(label, 'UNMAPPED')
    marker = "⚠" if family == 'UNMAPPED' else "✓"
    print(f"  {marker} {label}: {count:,} ({pct:.2f}%) → {family}")

# Check for unmapped
unmapped_labels = [l for l in label_series.index if l not in FAMILY_MAP]
if unmapped_labels:
    print(f"\n⚠ WARNING: {len(unmapped_labels)} unmapped labels found!")
    print(f"  You need to add these to FAMILY_MAP: {unmapped_labels}")

In [None]:
# =============================================================================
# HIERARCHICAL BALANCE ANALYSIS
# =============================================================================

print("=" * 60)
print("HIERARCHICAL BALANCE ANALYSIS")
print("=" * 60)

# Binary balance
total_benign = all_label_counts.get('BENIGN', 0)
total_attack = total_samples - total_benign

print("\n--- Stage 1: Binary Classification ---")
print(f"  BENIGN: {total_benign:,} ({total_benign/total_samples*100:.2f}%)")
print(f"  ATTACK: {total_attack:,} ({total_attack/total_samples*100:.2f}%)")
if total_attack > 0:
    print(f"  Imbalance ratio: {total_benign/total_attack:.2f}:1")

# Family balance (attack only)
print("\n--- Stage 2: Attack Family Classification ---")
print("  (Excluding BENIGN)")

family_counts = {}
for label, count in all_label_counts.items():
    if label == 'BENIGN':
        continue
    family = FAMILY_MAP.get(label, 'Unknown')
    family_counts[family] = family_counts.get(family, 0) + count

family_series = pd.Series(family_counts).sort_values(ascending=False)

for family, count in family_series.items():
    pct = count / total_attack * 100 if total_attack > 0 else 0
    print(f"  {family}: {count:,} ({pct:.2f}%)")

# Rare attack warnings
print("\n--- Rare Attack Warnings (<100 samples) ---")
for label, count in label_series.items():
    if label != 'BENIGN' and count < 100:
        print(f"  ⚠ {label}: {count} samples")

In [None]:
# =============================================================================
# VISUALIZATIONS
# =============================================================================

fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Plot 1: Binary balance pie
axes[0].pie(
    [total_benign, total_attack], 
    labels=['BENIGN', 'ATTACK'],
    autopct='%1.1f%%', 
    colors=['#2ecc71', '#e74c3c'],
    startangle=90,
    explode=[0, 0.05]
)
axes[0].set_title('Stage 1: Binary Balance')

# Plot 2: All labels bar (log scale)
label_series_sorted = label_series.sort_values(ascending=True)
colors_bar = ['#2ecc71' if l == 'BENIGN' else '#e74c3c' for l in label_series_sorted.index]
label_series_sorted.plot(kind='barh', ax=axes[1], color=colors_bar)
axes[1].set_xscale('log')
axes[1].set_xlabel('Count (log scale)')
axes[1].set_title('All Labels Distribution')

# Plot 3: File sizes
df_health_sorted = df_health.sort_values('n_rows', ascending=True)
df_health_sorted.plot(
    kind='barh', 
    x='category', 
    y='n_rows', 
    ax=axes[2],
    color='#3498db',
    legend=False
)
axes[2].set_xlabel('Number of Rows')
axes[2].set_title('Rows per File')

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'data_overview.png'), dpi=150, bbox_inches='tight')
plt.show()

print(f"✓ Saved to: {os.path.join(OUTPUT_DIR, 'data_overview.png')}")

---
# Phase 2: Build Stratified Global Sample

In [None]:
# =============================================================================
# BUILD STRATIFIED GLOBAL SAMPLE
# =============================================================================

print("=" * 60)
print("PHASE 2: BUILDING STRATIFIED GLOBAL SAMPLE")
print("=" * 60)

SAMPLE_FRAC = CONFIG['sample_fraction']
MIN_KEEP_ALL = CONFIG['min_samples_keep_all']
SEED = CONFIG['random_seed']

global_samples = []

for filepath in FILE_PATHS:
    filename = os.path.basename(filepath)
    category = get_file_category(filepath)
    
    print(f"\nSampling: {filename}...")
    
    try:
        df = pd.read_csv(filepath, low_memory=False)
        df = clean_column_names(df)
        df = normalize_labels_in_df(df, 'Label')  # NORMALIZE LABELS
        original_rows = len(df)
        
        # Add source file column
        df['Source_File'] = category
        
        # Stratified sampling by Label
        if 'Label' in df.columns:
            samples_list = []
            for label, group in df.groupby('Label'):
                if len(group) < MIN_KEEP_ALL:
                    # Keep all for rare classes
                    samples_list.append(group)
                    print(f"  - {label}: keeping all {len(group)} samples (rare class)")
                else:
                    # Sample fraction
                    sampled = group.sample(frac=SAMPLE_FRAC, random_state=SEED)
                    samples_list.append(sampled)
            sample = pd.concat(samples_list, ignore_index=True)
        else:
            sample = df.sample(frac=SAMPLE_FRAC, random_state=SEED)
        
        print(f"  Total: {original_rows:,} → {len(sample):,} ({len(sample)/original_rows*100:.1f}%)")
        global_samples.append(sample)
        
        del df
        gc.collect()
        
    except Exception as e:
        print(f"  ERROR: {e}")

# Concatenate
print("\n--- Merging samples ---")
global_df = pd.concat(global_samples, axis=0, ignore_index=True)
print(f"Global sample shape: {global_df.shape}")

del global_samples
gc.collect()

In [None]:
# =============================================================================
# ADD HIERARCHY LABELS
# =============================================================================

print("\n--- Adding hierarchy labels ---")

# Binary label
global_df['Is_Attack'] = (global_df['Label'] != 'BENIGN').astype(int)

# Family label
global_df['Attack_Family'] = global_df['Label'].map(FAMILY_MAP)

# Check for unmapped
unmapped_mask = global_df['Attack_Family'].isna()
if unmapped_mask.sum() > 0:
    unmapped_labels = global_df.loc[unmapped_mask, 'Label'].unique()
    print(f"\n⚠ WARNING: {unmapped_mask.sum()} rows have unmapped labels!")
    print(f"  Labels: {unmapped_labels}")
    global_df['Attack_Family'] = global_df['Attack_Family'].fillna('Unknown')

# Verify
print("\n--- Verification ---")
print(f"\nIs_Attack distribution:")
print(global_df['Is_Attack'].value_counts().to_string())

print(f"\nAttack_Family distribution:")
print(global_df['Attack_Family'].value_counts().to_string())

print(f"\nLabel distribution (sampled):")
print(global_df['Label'].value_counts().to_string())

In [None]:
# =============================================================================
# MEMORY OPTIMIZATION
# =============================================================================

print("\n--- Memory Optimization ---")

memory_before = estimate_memory_mb(global_df)
print(f"Before: {memory_before:.2f} MB")

# Downcast numerics
for col in global_df.select_dtypes(include=['int64']).columns:
    global_df[col] = pd.to_numeric(global_df[col], downcast='integer')

for col in global_df.select_dtypes(include=['float64']).columns:
    global_df[col] = pd.to_numeric(global_df[col], downcast='float')

# Convert strings to category
for col in ['Label', 'Attack_Family', 'Source_File']:
    if col in global_df.columns:
        global_df[col] = global_df[col].astype('category')

memory_after = estimate_memory_mb(global_df)
print(f"After: {memory_after:.2f} MB")
print(f"Reduction: {(1 - memory_after/memory_before)*100:.1f}%")

In [None]:
# =============================================================================
# SAVE GLOBAL SAMPLE
# =============================================================================

print("\n--- Saving Global Sample ---")

sample_path = os.path.join(OUTPUT_DIR, 'global_sample.csv')
global_df.to_csv(sample_path, index=False)

file_size_mb = os.path.getsize(sample_path) / (1024 * 1024)

print(f"✓ Saved to: {sample_path}")
print(f"  Rows: {len(global_df):,}")
print(f"  Columns: {global_df.shape[1]}")
print(f"  File size: {file_size_mb:.2f} MB")

In [None]:
# =============================================================================
# FINAL SUMMARY
# =============================================================================

print("=" * 60)
print("NOTEBOOK 1 COMPLETE: SUMMARY")
print("=" * 60)

print("\n--- Dataset Overview ---")
print(f"Files processed: {len(FILE_PATHS)}")
print(f"Total rows (full): {df_health['n_rows'].sum():,}")
print(f"Total rows (sample): {len(global_df):,}")
print(f"Effective sample rate: {len(global_df)/df_health['n_rows'].sum()*100:.1f}%")

print("\n--- Data Quality ---")
print(f"Nulls: {df_health['null_total'].sum():,}")
print(f"Infinities: {df_health['inf_total'].sum():,}")
print(f"Negatives: {df_health['neg_total'].sum():,}")

print("\n--- Hierarchical Balance (Sampled Data) ---")
benign_n = (global_df['Is_Attack'] == 0).sum()
attack_n = (global_df['Is_Attack'] == 1).sum()
print(f"Binary: BENIGN={benign_n:,} ({benign_n/len(global_df)*100:.1f}%), ATTACK={attack_n:,} ({attack_n/len(global_df)*100:.1f}%)")
print(f"Families: {global_df['Attack_Family'].nunique()}")
print(f"Attack types: {global_df[global_df['Is_Attack']==1]['Label'].nunique()}")

print("\n--- Output Files ---")
for f in os.listdir(OUTPUT_DIR):
    fpath = os.path.join(OUTPUT_DIR, f)
    size = os.path.getsize(fpath) / 1024  # KB
    print(f"  {f}: {size:.1f} KB")

print("\n--- Next Steps ---")
print("  → Run Notebook 02: Feature Analysis")
print("  → Run Notebook 03: Target Analysis")

# Save config
config_path = os.path.join(OUTPUT_DIR, 'CONFIG.json')
with open(config_path, 'w') as f:
    json.dump(CONFIG, f, indent=2)
print(f"\n✓ Config saved to: {config_path}")

print("\n" + "=" * 60)
print(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)